Modify the implementation of MemTracker: 1. Simplify a lot of useless logic; 2. Added MemTrackerTaskPool, as the ancestor of all query and import trackers, This is used to track the local memory usage of all tasks executing; 3. Add cosume/release cache, trigger a cosume/release when the memory accumulation exceeds the parameter mem_tracker_consume_min_size_bytes; 4. Add a new memory leak detection mode (Experimental feature), throw an exception when the remaining statistical value is greater than the specified range when the MemTracker is destructed, and print the accurate statistical value in HTTP, the parameter memory_leak_detection 5. Added Virtual MemTracker, cosume/release will not sync to parent. It will be used when introducing TCMalloc Hook to record memory later, to record the specified memory independently; 6. Modify the GC logic, register the buffer cached in DiskIoMgr as a GC function, and add other GC functions later; 7. Change the global root node from Root MemTracker to Process MemTracker, and remove Process MemTracker in exec_env; 8. Modify the macro that detects whether the memory has reached the upper limit, modify the parameters and default behavior of creating MemTracker, modify the error message format in mem_limit_exceeded, extend and apply transfer_to, remove Metric in MemTracker, etc.; Modify where MemTracker is used: 1. MemPool adds a constructor to create a temporary tracker to avoid a lot of redundant code; 2. Added trackers for global objects such as ChunkAllocator and StorageEngine; 3. Added more fine-grained trackers such as ExprContext; 4. RuntimeState removes FragmentMemTracker, that is, PlanFragmentExecutor mem_tracker, which was previously used for independent statistical scan process memory, and replaces it with _scanner_mem_tracker in OlapScanNode; 5. MemTracker is no longer recorded in ReservationTracker, and ReservationTracker will be removed later;
465 lines
20 KiB
C++
465 lines
20 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "exec/orc_scanner.h"
|
|
|
|
#include "exec/broker_reader.h"
|
|
#include "exec/buffered_reader.h"
|
|
#include "exec/local_file_reader.h"
|
|
#include "exec/s3_reader.h"
|
|
#include "exprs/expr.h"
|
|
#include "runtime/descriptors.h"
|
|
#include "runtime/exec_env.h"
|
|
#include "runtime/raw_value.h"
|
|
#include "runtime/runtime_state.h"
|
|
#include "runtime/tuple.h"
|
|
|
|
#if defined(__x86_64__)
|
|
#include "exec/hdfs_file_reader.h"
|
|
#endif
|
|
|
|
// orc include file didn't expose orc::TimezoneError
|
|
// we have to declare it by hand, following is the source code in orc link
|
|
// https://github.com/apache/orc/blob/84353fbfc447b06e0924024a8e03c1aaebd3e7a5/c%2B%2B/src/Timezone.hh#L104-L109
|
|
namespace orc {
|
|
|
|
class TimezoneError : public std::runtime_error {
|
|
public:
|
|
TimezoneError(const std::string& what);
|
|
TimezoneError(const TimezoneError&);
|
|
virtual ~TimezoneError() noexcept;
|
|
};
|
|
|
|
} // namespace orc
|
|
|
|
namespace doris {
|
|
|
|
class ORCFileStream : public orc::InputStream {
|
|
public:
|
|
ORCFileStream(FileReader* file, std::string filename)
|
|
: _file(file), _filename(std::move(filename)) {}
|
|
|
|
~ORCFileStream() override {
|
|
if (_file != nullptr) {
|
|
_file->close();
|
|
delete _file;
|
|
_file = nullptr;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the total length of the file in bytes.
|
|
*/
|
|
uint64_t getLength() const override { return _file->size(); }
|
|
|
|
/**
|
|
* Get the natural size for reads.
|
|
* @return the number of bytes that should be read at once
|
|
*/
|
|
uint64_t getNaturalReadSize() const override { return 128 * 1024; }
|
|
|
|
/**
|
|
* Read length bytes from the file starting at offset into
|
|
* the buffer starting at buf.
|
|
* @param buf the starting position of a buffer.
|
|
* @param length the number of bytes to read.
|
|
* @param offset the position in the stream to read from.
|
|
*/
|
|
void read(void* buf, uint64_t length, uint64_t offset) override {
|
|
if (buf == nullptr) {
|
|
throw orc::ParseError("Buffer is null");
|
|
}
|
|
|
|
int64_t bytes_read = 0;
|
|
int64_t reads = 0;
|
|
while (bytes_read < length) {
|
|
Status result = _file->readat(offset, length - bytes_read, &reads, buf);
|
|
if (!result.ok()) {
|
|
throw orc::ParseError("Bad read of " + _filename);
|
|
}
|
|
if (reads == 0) {
|
|
break;
|
|
}
|
|
bytes_read += reads; // total read bytes
|
|
offset += reads;
|
|
buf = (char*)buf + reads;
|
|
}
|
|
if (length != bytes_read) {
|
|
throw orc::ParseError("Short read of " + _filename +
|
|
". expected :" + std::to_string(length) +
|
|
", actual : " + std::to_string(bytes_read));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the name of the stream for error messages.
|
|
*/
|
|
const std::string& getName() const override { return _filename; }
|
|
|
|
private:
|
|
FileReader* _file;
|
|
std::string _filename;
|
|
};
|
|
|
|
ORCScanner::ORCScanner(RuntimeState* state, RuntimeProfile* profile,
|
|
const TBrokerScanRangeParams& params,
|
|
const std::vector<TBrokerRangeDesc>& ranges,
|
|
const std::vector<TNetworkAddress>& broker_addresses,
|
|
const std::vector<TExpr>& pre_filter_texprs,
|
|
ScannerCounter* counter)
|
|
: BaseScanner(state, profile, params, pre_filter_texprs, counter),
|
|
_ranges(ranges),
|
|
_broker_addresses(broker_addresses),
|
|
// _splittable(params.splittable),
|
|
_next_range(0),
|
|
_cur_file_eof(true),
|
|
_total_groups(0),
|
|
_current_group(0),
|
|
_rows_of_group(0),
|
|
_current_line_of_group(0) {}
|
|
|
|
ORCScanner::~ORCScanner() {
|
|
close();
|
|
}
|
|
|
|
Status ORCScanner::open() {
|
|
RETURN_IF_ERROR(BaseScanner::open());
|
|
if (!_ranges.empty()) {
|
|
std::list<std::string> include_cols;
|
|
TBrokerRangeDesc range = _ranges[0];
|
|
_num_of_columns_from_file = range.__isset.num_of_columns_from_file
|
|
? range.num_of_columns_from_file
|
|
: _src_slot_descs.size();
|
|
for (int i = 0; i < _num_of_columns_from_file; i++) {
|
|
auto slot_desc = _src_slot_descs.at(i);
|
|
include_cols.push_back(slot_desc->col_name());
|
|
}
|
|
_row_reader_options.include(include_cols);
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
Status ORCScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple ) {
|
|
try {
|
|
SCOPED_TIMER(_read_timer);
|
|
// Get one line
|
|
while (!_scanner_eof) {
|
|
if (_cur_file_eof) {
|
|
RETURN_IF_ERROR(open_next_reader());
|
|
if (_scanner_eof) {
|
|
*eof = true;
|
|
return Status::OK();
|
|
} else {
|
|
_cur_file_eof = false;
|
|
}
|
|
}
|
|
if (_current_line_of_group >= _rows_of_group) { // read next stripe
|
|
if (_current_group >= _total_groups) {
|
|
_cur_file_eof = true;
|
|
continue;
|
|
}
|
|
_rows_of_group = _reader->getStripe(_current_group)->getNumberOfRows();
|
|
_batch = _row_reader->createRowBatch(_rows_of_group);
|
|
_row_reader->next(*_batch.get());
|
|
|
|
_current_line_of_group = 0;
|
|
++_current_group;
|
|
}
|
|
|
|
const std::vector<orc::ColumnVectorBatch*>& batch_vec =
|
|
((orc::StructVectorBatch*)_batch.get())->fields;
|
|
for (int column_ipos = 0; column_ipos < _num_of_columns_from_file; ++column_ipos) {
|
|
auto slot_desc = _src_slot_descs[column_ipos];
|
|
orc::ColumnVectorBatch* cvb = batch_vec[_position_in_orc_original[column_ipos]];
|
|
|
|
if (cvb->hasNulls && !cvb->notNull[_current_line_of_group]) {
|
|
if (!slot_desc->is_nullable()) {
|
|
std::stringstream str_error;
|
|
str_error << "The field name(" << slot_desc->col_name()
|
|
<< ") is not nullable ";
|
|
LOG(WARNING) << str_error.str();
|
|
return Status::InternalError(str_error.str());
|
|
}
|
|
_src_tuple->set_null(slot_desc->null_indicator_offset());
|
|
} else {
|
|
int32_t wbytes = 0;
|
|
uint8_t tmp_buf[128] = {0};
|
|
if (slot_desc->is_nullable()) {
|
|
_src_tuple->set_not_null(slot_desc->null_indicator_offset());
|
|
}
|
|
void* slot = _src_tuple->get_slot(slot_desc->tuple_offset());
|
|
StringValue* str_slot = reinterpret_cast<StringValue*>(slot);
|
|
|
|
switch (_row_reader->getSelectedType()
|
|
.getSubtype(_position_in_orc_original[column_ipos])
|
|
->getKind()) {
|
|
case orc::BOOLEAN: {
|
|
int64_t value = ((orc::LongVectorBatch*)cvb)->data[_current_line_of_group];
|
|
if (value == 0) {
|
|
str_slot->ptr = reinterpret_cast<char*>(tuple_pool->allocate(5));
|
|
memcpy(str_slot->ptr, "false", 5);
|
|
str_slot->len = 5;
|
|
} else {
|
|
str_slot->ptr = reinterpret_cast<char*>(tuple_pool->allocate(4));
|
|
memcpy(str_slot->ptr, "true", 4);
|
|
str_slot->len = 4;
|
|
}
|
|
break;
|
|
}
|
|
case orc::BYTE:
|
|
case orc::INT:
|
|
case orc::SHORT:
|
|
case orc::LONG: {
|
|
int64_t value = ((orc::LongVectorBatch*)cvb)->data[_current_line_of_group];
|
|
wbytes = sprintf((char*)tmp_buf, "%ld", value);
|
|
str_slot->ptr = reinterpret_cast<char*>(tuple_pool->allocate(wbytes));
|
|
memcpy(str_slot->ptr, tmp_buf, wbytes);
|
|
str_slot->len = wbytes;
|
|
break;
|
|
}
|
|
case orc::FLOAT:
|
|
case orc::DOUBLE: {
|
|
double value = ((orc::DoubleVectorBatch*)cvb)->data[_current_line_of_group];
|
|
wbytes = sprintf((char*)tmp_buf, "%.9f", value);
|
|
str_slot->ptr = reinterpret_cast<char*>(tuple_pool->allocate(wbytes));
|
|
memcpy(str_slot->ptr, tmp_buf, wbytes);
|
|
str_slot->len = wbytes;
|
|
break;
|
|
}
|
|
case orc::BINARY:
|
|
case orc::CHAR:
|
|
case orc::VARCHAR:
|
|
case orc::STRING: {
|
|
char* value = ((orc::StringVectorBatch*)cvb)->data[_current_line_of_group];
|
|
wbytes = ((orc::StringVectorBatch*)cvb)->length[_current_line_of_group];
|
|
str_slot->ptr = reinterpret_cast<char*>(tuple_pool->allocate(wbytes));
|
|
memcpy(str_slot->ptr, value, wbytes);
|
|
str_slot->len = wbytes;
|
|
break;
|
|
}
|
|
case orc::DECIMAL: {
|
|
int precision = ((orc::Decimal64VectorBatch*)cvb)->precision;
|
|
int scale = ((orc::Decimal64VectorBatch*)cvb)->scale;
|
|
|
|
//Decimal64VectorBatch handles decimal columns with precision no greater than 18.
|
|
//Decimal128VectorBatch handles the others.
|
|
std::string decimal_str;
|
|
if (precision <= 18) {
|
|
decimal_str = std::to_string(((orc::Decimal64VectorBatch*)cvb)
|
|
->values[_current_line_of_group]);
|
|
} else {
|
|
decimal_str = ((orc::Decimal128VectorBatch*)cvb)
|
|
->values[_current_line_of_group]
|
|
.toString();
|
|
}
|
|
|
|
int negative = decimal_str[0] == '-' ? 1 : 0;
|
|
int decimal_scale_length = decimal_str.size() - negative;
|
|
|
|
std::string v;
|
|
if (decimal_scale_length <= scale) {
|
|
// decimal(5,2) : the integer of 0.01 is 1, so we should fill 0 befor integer
|
|
v = std::string(negative ? "-0." : "0.");
|
|
int fill_zero = scale - decimal_scale_length;
|
|
while (fill_zero--) {
|
|
v += "0";
|
|
}
|
|
if (negative) {
|
|
v += decimal_str.substr(1, decimal_str.length());
|
|
} else {
|
|
v += decimal_str;
|
|
}
|
|
} else {
|
|
//Orc api will fill in 0 at the end, so size must greater than scale
|
|
v = decimal_str.substr(0, decimal_str.size() - scale) + "." +
|
|
decimal_str.substr(decimal_str.size() - scale);
|
|
}
|
|
|
|
str_slot->ptr = reinterpret_cast<char*>(tuple_pool->allocate(v.size()));
|
|
memcpy(str_slot->ptr, v.c_str(), v.size());
|
|
str_slot->len = v.size();
|
|
break;
|
|
}
|
|
case orc::DATE: {
|
|
//Date columns record the number of days since the UNIX epoch (1/1/1970 in UTC).
|
|
int64_t timestamp =
|
|
((orc::LongVectorBatch*)cvb)->data[_current_line_of_group] * 24 *
|
|
60 * 60;
|
|
DateTimeValue dtv;
|
|
if (!dtv.from_unixtime(timestamp, "UTC")) {
|
|
std::stringstream str_error;
|
|
str_error
|
|
<< "Parse timestamp (" + std::to_string(timestamp) + ") error";
|
|
LOG(WARNING) << str_error.str();
|
|
return Status::InternalError(str_error.str());
|
|
}
|
|
dtv.cast_to_date();
|
|
char* buf_end = dtv.to_string((char*)tmp_buf);
|
|
wbytes = buf_end - (char*)tmp_buf - 1;
|
|
str_slot->ptr = reinterpret_cast<char*>(tuple_pool->allocate(wbytes));
|
|
memcpy(str_slot->ptr, tmp_buf, wbytes);
|
|
str_slot->len = wbytes;
|
|
break;
|
|
}
|
|
case orc::TIMESTAMP: {
|
|
//The time zone of orc's timestamp is stored inside orc's stripe information,
|
|
//so the timestamp obtained here is an offset timestamp, so parse timestamp with UTC is actual datetime literal.
|
|
int64_t timestamp =
|
|
((orc::TimestampVectorBatch*)cvb)->data[_current_line_of_group];
|
|
DateTimeValue dtv;
|
|
if (!dtv.from_unixtime(timestamp, "UTC")) {
|
|
std::stringstream str_error;
|
|
str_error
|
|
<< "Parse timestamp (" + std::to_string(timestamp) + ") error";
|
|
LOG(WARNING) << str_error.str();
|
|
return Status::InternalError(str_error.str());
|
|
}
|
|
char* buf_end = dtv.to_string((char*)tmp_buf);
|
|
wbytes = buf_end - (char*)tmp_buf - 1;
|
|
str_slot->ptr = reinterpret_cast<char*>(tuple_pool->allocate(wbytes));
|
|
memcpy(str_slot->ptr, tmp_buf, wbytes);
|
|
str_slot->len = wbytes;
|
|
break;
|
|
}
|
|
default: {
|
|
std::stringstream str_error;
|
|
str_error << "The field name(" << slot_desc->col_name()
|
|
<< ") type not support. ";
|
|
LOG(WARNING) << str_error.str();
|
|
return Status::InternalError(str_error.str());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
++_current_line_of_group;
|
|
|
|
// range of current file
|
|
const TBrokerRangeDesc& range = _ranges.at(_next_range - 1);
|
|
if (range.__isset.num_of_columns_from_file) {
|
|
fill_slots_of_columns_from_path(range.num_of_columns_from_file,
|
|
range.columns_from_path);
|
|
}
|
|
COUNTER_UPDATE(_rows_read_counter, 1);
|
|
SCOPED_TIMER(_materialize_timer);
|
|
RETURN_IF_ERROR(fill_dest_tuple(tuple, tuple_pool));
|
|
*fill_tuple = _success;
|
|
break;
|
|
}
|
|
return Status::OK();
|
|
} catch (orc::ParseError& e) {
|
|
std::stringstream str_error;
|
|
str_error << "ParseError : " << e.what();
|
|
LOG(WARNING) << str_error.str();
|
|
return Status::InternalError(str_error.str());
|
|
} catch (orc::InvalidArgument& e) {
|
|
std::stringstream str_error;
|
|
str_error << "ParseError : " << e.what();
|
|
LOG(WARNING) << str_error.str();
|
|
return Status::InternalError(str_error.str());
|
|
} catch (orc::TimezoneError& e) {
|
|
std::stringstream str_error;
|
|
str_error << "TimezoneError : " << e.what();
|
|
LOG(WARNING) << str_error.str();
|
|
return Status::InternalError(str_error.str());
|
|
}
|
|
}
|
|
|
|
Status ORCScanner::open_next_reader() {
|
|
while (true) {
|
|
if (_next_range >= _ranges.size()) {
|
|
_scanner_eof = true;
|
|
return Status::OK();
|
|
}
|
|
const TBrokerRangeDesc& range = _ranges[_next_range++];
|
|
std::unique_ptr<FileReader> file_reader;
|
|
switch (range.file_type) {
|
|
case TFileType::FILE_LOCAL: {
|
|
file_reader.reset(new LocalFileReader(range.path, range.start_offset));
|
|
break;
|
|
}
|
|
case TFileType::FILE_BROKER: {
|
|
int64_t file_size = 0;
|
|
// for compatibility
|
|
if (range.__isset.file_size) {
|
|
file_size = range.file_size;
|
|
}
|
|
file_reader.reset(new BufferedReader(_profile, new BrokerReader(_state->exec_env(), _broker_addresses,
|
|
_params.properties, range.path, range.start_offset,
|
|
file_size)));
|
|
break;
|
|
}
|
|
case TFileType::FILE_S3: {
|
|
file_reader.reset(new BufferedReader(_profile,
|
|
new S3Reader(_params.properties, range.path, range.start_offset)));
|
|
break;
|
|
}
|
|
case TFileType::FILE_HDFS: {
|
|
#if defined(__x86_64__)
|
|
file_reader.reset(new HdfsFileReader(
|
|
range.hdfs_params, range.path, range.start_offset));
|
|
break;
|
|
#else
|
|
return Status::InternalError("HdfsFileReader do not support on non x86 platform");
|
|
#endif
|
|
}
|
|
default: {
|
|
std::stringstream ss;
|
|
ss << "Unknown file type, type=" << range.file_type;
|
|
return Status::InternalError(ss.str());
|
|
}
|
|
}
|
|
RETURN_IF_ERROR(file_reader->open());
|
|
if (file_reader->size() == 0) {
|
|
file_reader->close();
|
|
continue;
|
|
}
|
|
|
|
std::unique_ptr<orc::InputStream> inStream = std::unique_ptr<orc::InputStream>(
|
|
new ORCFileStream(file_reader.release(), range.path));
|
|
_reader = orc::createReader(std::move(inStream), _options);
|
|
|
|
_total_groups = _reader->getNumberOfStripes();
|
|
_current_group = 0;
|
|
_rows_of_group = 0;
|
|
_current_line_of_group = 0;
|
|
_row_reader = _reader->createRowReader(_row_reader_options);
|
|
|
|
//include_colus is in loader columns order, and batch is in the orc order
|
|
_position_in_orc_original.clear();
|
|
_position_in_orc_original.resize(_num_of_columns_from_file);
|
|
int orc_index = 0;
|
|
auto include_cols = _row_reader_options.getIncludeNames();
|
|
for (int i = 0; i < _row_reader->getSelectedType().getSubtypeCount(); ++i) {
|
|
//include columns must in reader field, otherwise createRowReader will throw exception
|
|
auto pos = std::find(include_cols.begin(), include_cols.end(),
|
|
_row_reader->getSelectedType().getFieldName(i));
|
|
_position_in_orc_original.at(std::distance(include_cols.begin(), pos)) = orc_index++;
|
|
}
|
|
return Status::OK();
|
|
}
|
|
}
|
|
|
|
void ORCScanner::close() {
|
|
BaseScanner::close();
|
|
_batch = nullptr;
|
|
_reader.reset(nullptr);
|
|
_row_reader.reset(nullptr);
|
|
}
|
|
|
|
} // namespace doris
|