Files
doris/be/src/util/load_error_hub.cpp
Mingyu Chen a51ce03595 Enhance the usability of Load operation (#490)
1. Add broker load error hub
A broker load error hub will collect error messages in load process and saves them as a file to the specified remote storage via broker. In case that in broker/min/streaming load process, user may not be able to access the error log file in Backend directly.
We also add a new header option: 'enable_hub' in streaming load request, and default is false. Because if we enable the broker load error hub, it will significantly slow down the processing speed of streaming load, due to the visit of remote storage via broker. So use can disable the error load hub using this header option, to avoid slowing down the load speed.

2. Show load error logs by using SHOW LOAD WARNINGS stmt
We also provide a more easy way to get load error logs. We implement 'SHOW LOAD WARNINGS ON 'url'' stmt to show load error logs directly. The 'url' in stmt is provided in 'SHOW  LOAD' stmt.
eg:
show load warnings on "http://192.168.1.1:8040/api/_load_error_log?file=__shard_2/error_log_xxx";

3. Support now() function in broker load
User can mapping a column to now() in broker load stmt, which means this column will be filled with time when the ETL started.

4. Support more types of wildcard in broker load
Currently, we only support wildcard '*' to match the file names. wildcard like '/path/to/20190[1-4]*' is not support.
2019-01-03 19:07:27 +08:00

77 lines
2.5 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "util/load_error_hub.h"
#include "util/mysql_load_error_hub.h"
#include "util/broker_load_error_hub.h"
#include "util/null_load_error_hub.h"
#include <thrift/protocol/TDebugProtocol.h>
#include "gen_cpp/PaloInternalService_types.h"
namespace doris {
Status LoadErrorHub::create_hub(
ExecEnv* env,
const TLoadErrorHubInfo* t_hub_info,
const std::string& error_log_file_name,
std::unique_ptr<LoadErrorHub>* hub) {
LoadErrorHub* tmp_hub = nullptr;
if (t_hub_info == nullptr) {
tmp_hub = new NullLoadErrorHub();
tmp_hub->prepare();
hub->reset(tmp_hub);
return Status::OK;
}
VLOG_ROW << "create_hub: " << apache::thrift::ThriftDebugString(*t_hub_info).c_str();
switch (t_hub_info->type) {
case TErrorHubType::MYSQL:
tmp_hub = new MysqlLoadErrorHub(t_hub_info->mysql_info);
tmp_hub->prepare();
hub->reset(tmp_hub);
break;
case TErrorHubType::BROKER: {
// the origin file name may contains __shard_0/xxx
// replace the '/' with '_'
std::string copied_name(error_log_file_name);
std::replace(copied_name.begin(), copied_name.end(), '/', '_');
tmp_hub = new BrokerLoadErrorHub(env, t_hub_info->broker_info,
copied_name);
tmp_hub->prepare();
hub->reset(tmp_hub);
break;
}
case TErrorHubType::NULL_TYPE:
tmp_hub = new NullLoadErrorHub();
tmp_hub->prepare();
hub->reset(tmp_hub);
break;
default:
std::stringstream err;
err << "Unknown hub type." << t_hub_info->type;
return Status(err.str());
}
return Status::OK;
}
} // end namespace doris