[refactor](remove unused code) remove storage buffer and orc reader (#16137)
remove olap storage byte buffer remove orc reader remove time operator remove read_write_util remove aggregate funcs remove compress.h and cpp remove bhp_lib Co-authored-by: yiguolei <yiguolei@gmail.com>
This commit is contained in:
@ -33,7 +33,6 @@
|
||||
#include "exprs/math_functions.h"
|
||||
#include "exprs/quantile_function.h"
|
||||
#include "exprs/string_functions.h"
|
||||
#include "exprs/time_operators.h"
|
||||
#include "exprs/timestamp_functions.h"
|
||||
#include "exprs/topn_function.h"
|
||||
#include "exprs/utility_functions.h"
|
||||
@ -370,7 +369,6 @@ void Daemon::init(int argc, char** argv, const std::vector<StorePath>& paths) {
|
||||
MathFunctions::init();
|
||||
EncryptionFunctions::init();
|
||||
TimestampFunctions::init();
|
||||
TimeOperators::init();
|
||||
UtilityFunctions::init();
|
||||
JsonFunctions::init();
|
||||
GeoFunctions::init();
|
||||
|
||||
@ -23,7 +23,6 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/exec")
|
||||
|
||||
set(EXEC_FILES
|
||||
arrow/arrow_reader.cpp
|
||||
arrow/orc_reader.cpp
|
||||
arrow/parquet_reader.cpp
|
||||
base_scanner.cpp
|
||||
data_sink.cpp
|
||||
|
||||
@ -1,182 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
#include "exec/arrow/orc_reader.h"
|
||||
|
||||
#include <arrow/array.h>
|
||||
#include <arrow/status.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "io/file_reader.h"
|
||||
#include "runtime/mem_pool.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "runtime/tuple.h"
|
||||
#include "util/string_util.h"
|
||||
#include "vec/utils/arrow_column_to_doris_column.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
ORCReaderWrap::ORCReaderWrap(RuntimeState* state,
|
||||
const std::vector<SlotDescriptor*>& file_slot_descs,
|
||||
FileReader* file_reader, int32_t num_of_columns_from_file,
|
||||
int64_t range_start_offset, int64_t range_size, bool case_sensitive)
|
||||
: ArrowReaderWrap(state, file_slot_descs, file_reader, num_of_columns_from_file,
|
||||
case_sensitive),
|
||||
_range_start_offset(range_start_offset),
|
||||
_range_size(range_size) {
|
||||
_reader = nullptr;
|
||||
_cur_file_eof = false;
|
||||
}
|
||||
|
||||
Status ORCReaderWrap::init_reader(const TupleDescriptor* tuple_desc, const std::string& timezone) {
|
||||
// Open ORC file reader
|
||||
auto maybe_reader =
|
||||
arrow::adapters::orc::ORCFileReader::Open(_arrow_file, arrow::default_memory_pool());
|
||||
if (!maybe_reader.ok()) {
|
||||
// Handle error instantiating file reader...
|
||||
LOG(WARNING) << "failed to create orc file reader, errmsg=" << maybe_reader.status();
|
||||
return Status::InternalError("Failed to create orc file reader");
|
||||
}
|
||||
_reader = std::move(maybe_reader.ValueOrDie());
|
||||
_total_groups = _reader->NumberOfStripes();
|
||||
if (_total_groups == 0) {
|
||||
return Status::EndOfFile("Empty Orc File");
|
||||
}
|
||||
// seek file position after _reader created.
|
||||
RETURN_IF_ERROR(_seek_start_stripe());
|
||||
|
||||
// map
|
||||
arrow::Result<std::shared_ptr<arrow::Schema>> maybe_schema = _reader->ReadSchema();
|
||||
if (!maybe_schema.ok()) {
|
||||
// Handle error instantiating file reader...
|
||||
LOG(WARNING) << "failed to read schema, errmsg=" << maybe_schema.status();
|
||||
return Status::InternalError("Failed to create orc file reader");
|
||||
}
|
||||
_schema = maybe_schema.ValueOrDie();
|
||||
for (size_t i = 0; i < _schema->num_fields(); ++i) {
|
||||
std::string schemaName =
|
||||
_case_sensitive ? _schema->field(i)->name() : to_lower(_schema->field(i)->name());
|
||||
// orc index started from 1.
|
||||
_map_column.emplace(schemaName, i + 1);
|
||||
}
|
||||
RETURN_IF_ERROR(column_indices());
|
||||
|
||||
_thread = std::thread(&ArrowReaderWrap::prefetch_batch, this);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ORCReaderWrap::get_columns(std::unordered_map<std::string, TypeDescriptor>* name_to_type,
|
||||
std::unordered_set<std::string>* missing_cols) {
|
||||
for (size_t i = 0; i < _schema->num_fields(); ++i) {
|
||||
std::string schema_name =
|
||||
_case_sensitive ? _schema->field(i)->name() : to_lower(_schema->field(i)->name());
|
||||
TypeDescriptor type;
|
||||
RETURN_IF_ERROR(
|
||||
vectorized::arrow_type_to_doris_type(_schema->field(i)->type()->id(), &type));
|
||||
name_to_type->emplace(schema_name, type);
|
||||
}
|
||||
|
||||
for (auto& col : _missing_cols) {
|
||||
missing_cols->insert(col);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ORCReaderWrap::_seek_start_stripe() {
|
||||
// If file was from Hms table, _range_start_offset is started from 3(magic word).
|
||||
// And if file was from load, _range_start_offset is always set to zero.
|
||||
// So now we only support file split for hms table.
|
||||
// TODO: support file split for loading.
|
||||
if (_range_size <= 0 || _range_start_offset == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
int64_t row_number = 0;
|
||||
int start_group = _current_group;
|
||||
int end_group = _total_groups;
|
||||
for (int i = 0; i < _total_groups; i++) {
|
||||
int64_t _offset = _reader->GetRawORCReader()->getStripe(i)->getOffset();
|
||||
int64_t row = _reader->GetRawORCReader()->getStripe(i)->getNumberOfRows();
|
||||
if (_offset < _range_start_offset) {
|
||||
row_number += row;
|
||||
} else if (_offset == _range_start_offset) {
|
||||
// If using the external file scan, _range_start_offset is always in the offset lists.
|
||||
// If using broker load, _range_start_offset is always set to be 0.
|
||||
start_group = i;
|
||||
}
|
||||
if (_range_start_offset + _range_size <= _offset) {
|
||||
end_group = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
LOG(INFO) << "This reader read orc file from offset: " << _range_start_offset
|
||||
<< " with size: " << _range_size << ". Also mean that read from strip id from "
|
||||
<< start_group << " to " << end_group;
|
||||
|
||||
if (!_reader->Seek(row_number).ok()) {
|
||||
LOG(WARNING) << "Failed to seek to the line number: " << row_number;
|
||||
return Status::InternalError("Failed to seek to the line number");
|
||||
}
|
||||
|
||||
_current_group = start_group;
|
||||
_total_groups = end_group;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ORCReaderWrap::_next_stripe_reader(bool* eof) {
|
||||
if (_current_group >= _total_groups) {
|
||||
*eof = true;
|
||||
return Status::OK();
|
||||
}
|
||||
// Get a stripe level record batch iterator.
|
||||
// record batch will have up to batch_size rows.
|
||||
// NextStripeReader serves as a fine grained alternative to ReadStripe
|
||||
// which may cause OOM issues by loading the whole stripe into memory.
|
||||
// Note this will only read rows for the current stripe, not the entire file.
|
||||
arrow::Result<std::shared_ptr<arrow::RecordBatchReader>> maybe_rb_reader =
|
||||
_reader->NextStripeReader(_state->batch_size(), _include_cols);
|
||||
if (!maybe_rb_reader.ok()) {
|
||||
LOG(WARNING) << "Get RecordBatch Failed. " << maybe_rb_reader.status();
|
||||
return Status::InternalError(maybe_rb_reader.status().ToString());
|
||||
}
|
||||
_rb_reader = maybe_rb_reader.ValueOrDie();
|
||||
_current_group++;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void ORCReaderWrap::read_batches(arrow::RecordBatchVector& batches, int current_group) {
|
||||
bool eof = false;
|
||||
Status status = _next_stripe_reader(&eof);
|
||||
if (!status.ok()) {
|
||||
_closed = true;
|
||||
return;
|
||||
}
|
||||
if (eof) {
|
||||
_closed = true;
|
||||
return;
|
||||
}
|
||||
|
||||
_status = _rb_reader->ReadAll(&batches);
|
||||
}
|
||||
|
||||
bool ORCReaderWrap::filter_row_group(int current_group) {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
@ -1,61 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <arrow/adapters/orc/adapter.h>
|
||||
#include <arrow/api.h>
|
||||
#include <arrow/buffer.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "exec/arrow/arrow_reader.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
// Reader of ORC file
|
||||
class ORCReaderWrap final : public ArrowReaderWrap {
|
||||
public:
|
||||
ORCReaderWrap(RuntimeState* state, const std::vector<SlotDescriptor*>& file_slot_descs,
|
||||
FileReader* file_reader, int32_t num_of_columns_from_file,
|
||||
int64_t range_start_offset, int64_t range_size, bool case_sensitive = true);
|
||||
~ORCReaderWrap() override = default;
|
||||
|
||||
Status init_reader(const TupleDescriptor* tuple_desc, const std::string& timezone) override;
|
||||
|
||||
Status get_columns(std::unordered_map<std::string, TypeDescriptor>* name_to_type,
|
||||
std::unordered_set<std::string>* missing_cols) override;
|
||||
|
||||
private:
|
||||
Status _next_stripe_reader(bool* eof);
|
||||
Status _seek_start_stripe();
|
||||
void read_batches(arrow::RecordBatchVector& batches, int current_group) override;
|
||||
bool filter_row_group(int current_group) override;
|
||||
|
||||
private:
|
||||
// orc file reader object
|
||||
std::unique_ptr<arrow::adapters::orc::ORCFileReader> _reader;
|
||||
std::shared_ptr<arrow::Schema> _schema;
|
||||
bool _cur_file_eof; // is read over?
|
||||
int64_t _range_start_offset;
|
||||
int64_t _range_size;
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
@ -1,72 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "exec/read_write_util.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
int ReadWriteUtil::put_zint(int32_t integer, uint8_t* buf) {
|
||||
// Move the sign bit to the first bit.
|
||||
uint32_t uinteger = (integer << 1) ^ (integer >> 31);
|
||||
const int mask = 0x7f;
|
||||
const int cont = 0x80;
|
||||
buf[0] = uinteger & mask;
|
||||
int len = 1;
|
||||
|
||||
while ((uinteger >>= 7) != 0) {
|
||||
// Set the continuation bit.
|
||||
buf[len - 1] |= cont;
|
||||
buf[len] = uinteger & mask;
|
||||
++len;
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
int ReadWriteUtil::put_zlong(int64_t longint, uint8_t* buf) {
|
||||
// Move the sign bit to the first bit.
|
||||
uint64_t ulongint = (longint << 1) ^ (longint >> 63);
|
||||
const int mask = 0x7f;
|
||||
const int cont = 0x80;
|
||||
buf[0] = ulongint & mask;
|
||||
int len = 1;
|
||||
|
||||
while ((ulongint >>= 7) != 0) {
|
||||
// Set the continuation bit.
|
||||
buf[len - 1] |= cont;
|
||||
buf[len] = ulongint & mask;
|
||||
++len;
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
std::string ReadWriteUtil::hex_dump(const uint8_t* buf, int64_t length) {
|
||||
std::stringstream ss;
|
||||
ss << std::hex;
|
||||
|
||||
for (int i = 0; i < length; ++i) {
|
||||
ss << static_cast<int>(buf[i]) << " ";
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string ReadWriteUtil::hex_dump(const char* buf, int64_t length) {
|
||||
return hex_dump(reinterpret_cast<const uint8_t*>(buf), length);
|
||||
}
|
||||
} // namespace doris
|
||||
@ -1,229 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include "common/status.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
#define RETURN_IF_FALSE(x) \
|
||||
if (UNLIKELY(!(x))) return false
|
||||
|
||||
// Class for reading and writing various data types.
|
||||
class ReadWriteUtil {
|
||||
public:
|
||||
// Maximum length for Writeable VInt
|
||||
static const int MAX_VINT_LEN = 9;
|
||||
|
||||
// Maximum lengths for Zigzag encodings.
|
||||
const static int MAX_ZINT_LEN = 5;
|
||||
const static int MAX_ZLONG_LEN = 10;
|
||||
|
||||
// Put a zigzag encoded integer into a buffer and return its length.
|
||||
static int put_zint(int32_t integer, uint8_t* buf);
|
||||
|
||||
// Put a zigzag encoded long integer into a buffer and return its length.
|
||||
static int put_zlong(int64_t longint, uint8_t* buf);
|
||||
|
||||
// Get a big endian integer from a buffer. The buffer does not have to be word aligned.
|
||||
static int32_t get_int(const uint8_t* buffer);
|
||||
static int16_t get_small_int(const uint8_t* buffer);
|
||||
static int64_t get_long_int(const uint8_t* buffer);
|
||||
|
||||
// Get a variable-length Long or int value from a byte buffer.
|
||||
// Returns the length of the long/int
|
||||
// If the size byte is corrupted then return -1;
|
||||
static int get_vlong(uint8_t* buf, int64_t* vlong);
|
||||
static int get_vint(uint8_t* buf, int32_t* vint);
|
||||
|
||||
// Read a variable-length Long value from a byte buffer starting at the specified
|
||||
// byte offset.
|
||||
static int get_vlong(uint8_t* buf, int64_t offset, int64_t* vlong);
|
||||
|
||||
// Put an Integer into a buffer in big endian order . The buffer must be at least
|
||||
// 4 bytes long.
|
||||
static void put_int(uint8_t* buf, int32_t integer);
|
||||
|
||||
// Dump the first length bytes of buf to a Hex string.
|
||||
static std::string hex_dump(const uint8_t* buf, int64_t length);
|
||||
static std::string hex_dump(const char* buf, int64_t length);
|
||||
|
||||
// Determines the sign of a VInt/VLong from the first byte.
|
||||
static bool is_negative_vint(int8_t byte);
|
||||
|
||||
// Determines the total length in bytes of a Writable VInt/VLong from the first byte.
|
||||
static int decode_vint_size(int8_t byte);
|
||||
|
||||
// The following methods read data from a buffer without assuming the buffer is long
|
||||
// enough. If the buffer isn't long enough or another error occurs, they return false
|
||||
// and update the status with the error. Otherwise they return true. buffer is advanced
|
||||
// past the data read and buf_len is decremented appropriately.
|
||||
|
||||
// Read a zig-zag encoded long. This is the integer encoding defined by google.com
|
||||
// protocol-buffers: https://developers.google.com/protocol-buffers/docs/encoding
|
||||
static bool read_zlong(uint8_t** buf, int* buf_len, int64_t* val, Status* status);
|
||||
|
||||
// Read a zig-zag encoded int.
|
||||
static bool read_zint(uint8_t** buf, int* buf_len, int32_t* val, Status* status);
|
||||
|
||||
// Read a native type T (e.g. bool, float) directly into output (i.e. input is cast
|
||||
// directly to T and incremented by sizeof(T)).
|
||||
template <class T>
|
||||
static bool read(uint8_t** buf, int* buf_len, T* val, Status* status);
|
||||
|
||||
// Skip the next num_bytes bytes.
|
||||
static bool skip_bytes(uint8_t** buf, int* buf_len, int num_bytes, Status* status);
|
||||
};
|
||||
|
||||
inline int16_t ReadWriteUtil::get_small_int(const uint8_t* buf) {
|
||||
return (buf[0] << 8) | buf[1];
|
||||
}
|
||||
|
||||
inline int32_t ReadWriteUtil::get_int(const uint8_t* buf) {
|
||||
return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
|
||||
}
|
||||
|
||||
inline int64_t ReadWriteUtil::get_long_int(const uint8_t* buf) {
|
||||
return (static_cast<int64_t>(buf[0]) << 56) | (static_cast<int64_t>(buf[1]) << 48) |
|
||||
(static_cast<int64_t>(buf[2]) << 40) | (static_cast<int64_t>(buf[3]) << 32) |
|
||||
(buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
|
||||
}
|
||||
|
||||
inline void ReadWriteUtil::put_int(uint8_t* buf, int32_t integer) {
|
||||
buf[0] = integer >> 24;
|
||||
buf[1] = integer >> 16;
|
||||
buf[2] = integer >> 8;
|
||||
buf[3] = integer;
|
||||
}
|
||||
|
||||
inline int ReadWriteUtil::get_vint(uint8_t* buf, int32_t* vint) {
|
||||
int64_t vlong = 0;
|
||||
int len = get_vlong(buf, &vlong);
|
||||
*vint = static_cast<int32_t>(vlong);
|
||||
return len;
|
||||
}
|
||||
|
||||
inline int ReadWriteUtil::get_vlong(uint8_t* buf, int64_t* vlong) {
|
||||
return get_vlong(buf, 0, vlong);
|
||||
}
|
||||
|
||||
inline int ReadWriteUtil::get_vlong(uint8_t* buf, int64_t offset, int64_t* vlong) {
|
||||
int8_t firstbyte = (int8_t)buf[0 + offset];
|
||||
|
||||
int len = decode_vint_size(firstbyte);
|
||||
|
||||
if (len > MAX_VINT_LEN) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (len == 1) {
|
||||
*vlong = static_cast<int64_t>(firstbyte);
|
||||
return len;
|
||||
}
|
||||
|
||||
*vlong &= ~*vlong;
|
||||
|
||||
for (int i = 1; i < len; i++) {
|
||||
*vlong = (*vlong << 8) | buf[i + offset];
|
||||
}
|
||||
|
||||
if (is_negative_vint(firstbyte)) {
|
||||
*vlong = *vlong ^ ((int64_t)-1);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
inline bool ReadWriteUtil::read_zint(uint8_t** buf, int* buf_len, int32_t* val, Status* status) {
|
||||
int64_t zlong;
|
||||
RETURN_IF_FALSE(read_zlong(buf, buf_len, &zlong, status));
|
||||
*val = static_cast<int32_t>(zlong);
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool ReadWriteUtil::read_zlong(uint8_t** buf, int* buf_len, int64_t* val, Status* status) {
|
||||
uint64_t zlong = 0;
|
||||
int shift = 0;
|
||||
bool more;
|
||||
|
||||
do {
|
||||
DCHECK_LE(shift, 64);
|
||||
|
||||
if (UNLIKELY(*buf_len < 1)) {
|
||||
*status = Status::InternalError("Insufficient buffer length");
|
||||
return false;
|
||||
}
|
||||
|
||||
zlong |= static_cast<uint64_t>(**buf & 0x7f) << shift;
|
||||
shift += 7;
|
||||
more = (**buf & 0x80) != 0;
|
||||
++(*buf);
|
||||
--(*buf_len);
|
||||
} while (more);
|
||||
|
||||
*val = (zlong >> 1) ^ -(zlong & 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline bool ReadWriteUtil::read(uint8_t** buf, int* buf_len, T* val, Status* status) {
|
||||
int val_len = sizeof(T);
|
||||
|
||||
if (UNLIKELY(val_len > *buf_len)) {
|
||||
*status = Status::InternalError("Cannot read {} bytes, buffer length is {}", val_len,
|
||||
*buf_len);
|
||||
return false;
|
||||
}
|
||||
|
||||
*val = *reinterpret_cast<T*>(*buf);
|
||||
*buf += val_len;
|
||||
*buf_len -= val_len;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool ReadWriteUtil::skip_bytes(uint8_t** buf, int* buf_len, int num_bytes, Status* status) {
|
||||
DCHECK_GE(*buf_len, 0);
|
||||
|
||||
if (UNLIKELY(num_bytes > *buf_len)) {
|
||||
*status = Status::InternalError("Cannot skip {} bytes, buffer length is {}", num_bytes,
|
||||
*buf_len);
|
||||
return false;
|
||||
}
|
||||
|
||||
*buf += num_bytes;
|
||||
*buf_len -= num_bytes;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool ReadWriteUtil::is_negative_vint(int8_t byte) {
|
||||
return byte < -120 || (byte >= -112 && byte < 0);
|
||||
}
|
||||
|
||||
inline int ReadWriteUtil::decode_vint_size(int8_t byte) {
|
||||
if (byte >= -112) {
|
||||
return 1;
|
||||
} else if (byte < -120) {
|
||||
return -119 - byte;
|
||||
}
|
||||
|
||||
return -111 - byte;
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
@ -23,10 +23,8 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/exprs")
|
||||
|
||||
add_library(Exprs
|
||||
encryption_functions.cpp
|
||||
aggregate_functions.cpp
|
||||
anyval_util.cpp
|
||||
cast_functions.cpp
|
||||
time_operators.cpp
|
||||
hash_functions.cpp
|
||||
block_bloom_filter_avx_impl.cc
|
||||
block_bloom_filter_impl.cc
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,422 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
// This file is copied from
|
||||
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/aggregate-functions.h
|
||||
// and modified by Doris
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "udf/udf.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class HllSetResolver;
|
||||
class HybridSetBase;
|
||||
|
||||
// Collection of builtin aggregate functions. Aggregate functions implement
|
||||
// the various phases of the aggregation: Init(), Update(), Serialize(), Merge(),
|
||||
// and Finalize(). Not all functions need to implement all of the steps and
|
||||
// some of the parts can be reused across different aggregate functions.
|
||||
// This functions are implemented using the UDA interface.
|
||||
|
||||
class AggregateFunctions {
|
||||
public:
|
||||
// Initializes dst to nullptr.
|
||||
static void init_null(doris_udf::FunctionContext*, doris_udf::AnyVal* dst);
|
||||
// Initializes dst to nullptr and sets dst->ptr to nullptr.
|
||||
static void init_null_string(doris_udf::FunctionContext* c, doris_udf::StringVal* dst);
|
||||
|
||||
// Initializes dst to 0 and is_null = true.
|
||||
template <typename T>
|
||||
static void init_zero(doris_udf::FunctionContext*, T* dst);
|
||||
|
||||
// Initializes dst to 0 and is_null = true.
|
||||
template <typename T>
|
||||
static void init_zero_null(doris_udf::FunctionContext*, T* dst);
|
||||
|
||||
// Initializes dst to 0.
|
||||
template <typename T>
|
||||
static void init_zero_not_null(doris_udf::FunctionContext*, T* dst);
|
||||
|
||||
template <typename SRC_VAL, typename DST_VAL>
|
||||
static void sum_remove(doris_udf::FunctionContext* ctx, const SRC_VAL& src, DST_VAL* dst);
|
||||
|
||||
// doris_udf::StringVal GetValue() function that returns a copy of src
|
||||
static doris_udf::StringVal string_val_get_value(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::StringVal& src);
|
||||
static doris_udf::StringVal string_val_serialize_or_finalize(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::StringVal& src);
|
||||
|
||||
// Implementation of Count and Count(*)
|
||||
static void count_update(doris_udf::FunctionContext*, const doris_udf::AnyVal& src,
|
||||
doris_udf::BigIntVal* dst);
|
||||
static void count_merge(doris_udf::FunctionContext*, const doris_udf::BigIntVal& src,
|
||||
doris_udf::BigIntVal* dst);
|
||||
static void count_remove(doris_udf::FunctionContext*, const doris_udf::AnyVal& src,
|
||||
doris_udf::BigIntVal* dst);
|
||||
static void count_star_update(doris_udf::FunctionContext*, doris_udf::BigIntVal* dst);
|
||||
|
||||
static void count_star_remove(FunctionContext*, BigIntVal* dst);
|
||||
|
||||
// Implementation of percentile
|
||||
static void percentile_init(FunctionContext* ctx, StringVal* dst);
|
||||
|
||||
template <typename T>
|
||||
static void percentile_update(FunctionContext* ctx, const T& src, const DoubleVal& quantile,
|
||||
StringVal* dst);
|
||||
|
||||
static void percentile_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst);
|
||||
|
||||
static StringVal percentile_serialize(FunctionContext* ctx, const StringVal& state_sv);
|
||||
|
||||
static DoubleVal percentile_finalize(FunctionContext* ctx, const StringVal& src);
|
||||
|
||||
// Implementation of percentile_approx
|
||||
static void percentile_approx_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst);
|
||||
|
||||
template <typename T>
|
||||
static void percentile_approx_update(FunctionContext* ctx, const T& src,
|
||||
const DoubleVal& quantile, StringVal* dst);
|
||||
|
||||
template <typename T>
|
||||
static void percentile_approx_update(FunctionContext* ctx, const T& src,
|
||||
const DoubleVal& quantile,
|
||||
const DoubleVal& digest_compression, StringVal* dst);
|
||||
|
||||
static void percentile_approx_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst);
|
||||
|
||||
static DoubleVal percentile_approx_finalize(FunctionContext* ctx, const StringVal& src);
|
||||
|
||||
static StringVal percentile_approx_serialize(FunctionContext* ctx, const StringVal& state_sv);
|
||||
|
||||
// Implementation of Avg.
|
||||
// TODO: Change this to use a fixed-sized BufferVal as intermediate type.
|
||||
static void avg_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst);
|
||||
template <typename T>
|
||||
static void avg_update(doris_udf::FunctionContext* ctx, const T& src,
|
||||
doris_udf::StringVal* dst);
|
||||
template <typename T>
|
||||
static void avg_remove(doris_udf::FunctionContext* ctx, const T& src,
|
||||
doris_udf::StringVal* dst);
|
||||
static void avg_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst);
|
||||
static doris_udf::DoubleVal avg_get_value(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::StringVal& val);
|
||||
static doris_udf::DoubleVal avg_finalize(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::StringVal& val);
|
||||
|
||||
// Avg for timestamp. Uses avg_init() and AvgMerge().
|
||||
static void timestamp_avg_update(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::DateTimeVal& src, doris_udf::StringVal* dst);
|
||||
static void timestamp_avg_remove(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::DateTimeVal& src, doris_udf::StringVal* dst);
|
||||
static doris_udf::DateTimeVal timestamp_avg_get_value(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::StringVal& val);
|
||||
static doris_udf::DateTimeVal timestamp_avg_finalize(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::StringVal& val);
|
||||
|
||||
// Avg for decimals.
|
||||
static void decimalv2_avg_init(doris_udf::FunctionContext* ctx, doris_udf::StringVal* dst);
|
||||
static void decimalv2_avg_update(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::DecimalV2Val& src, doris_udf::StringVal* dst);
|
||||
static void decimalv2_avg_merge(FunctionContext* ctx, const doris_udf::StringVal& src,
|
||||
doris_udf::StringVal* dst);
|
||||
static doris_udf::StringVal decimalv2_avg_serialize(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::StringVal& src);
|
||||
static void decimalv2_avg_remove(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::DecimalV2Val& src, doris_udf::StringVal* dst);
|
||||
|
||||
static doris_udf::DecimalV2Val decimalv2_avg_get_value(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::StringVal& val);
|
||||
static doris_udf::DecimalV2Val decimalv2_avg_finalize(doris_udf::FunctionContext* ctx,
|
||||
const doris_udf::StringVal& val);
|
||||
// SumUpdate, SumMerge
|
||||
template <typename SRC_VAL, typename DST_VAL>
|
||||
static void sum(doris_udf::FunctionContext*, const SRC_VAL& src, DST_VAL* dst);
|
||||
|
||||
// MinInit
|
||||
template <typename T>
|
||||
static void min_init(doris_udf::FunctionContext*, T* dst);
|
||||
|
||||
// MinUpdate/MinMerge
|
||||
template <typename T>
|
||||
static void min(doris_udf::FunctionContext*, const T& src, T* dst);
|
||||
|
||||
// MaxInit
|
||||
template <typename T>
|
||||
static void max_init(doris_udf::FunctionContext*, T* dst);
|
||||
|
||||
// MaxUpdate/MaxMerge
|
||||
template <typename T>
|
||||
static void max(doris_udf::FunctionContext*, const T& src, T* dst);
|
||||
|
||||
// AnyInit
|
||||
template <typename T>
|
||||
static void any_init(doris_udf::FunctionContext*, T* dst);
|
||||
|
||||
// AnyUpdate/AnyMerge
|
||||
template <typename T>
|
||||
static void any(doris_udf::FunctionContext*, const T& src, T* dst);
|
||||
|
||||
// String concat
|
||||
static void string_concat(doris_udf::FunctionContext*, const doris_udf::StringVal& src,
|
||||
const doris_udf::StringVal& separator, doris_udf::StringVal* result);
|
||||
|
||||
/// String concat
|
||||
static void string_concat_update(FunctionContext*, const StringVal& src, StringVal* result);
|
||||
static void string_concat_update(FunctionContext*, const StringVal& src,
|
||||
const StringVal& separator, StringVal* result);
|
||||
static void string_concat_merge(FunctionContext*, const StringVal& src, StringVal* result);
|
||||
static StringVal string_concat_finalize(FunctionContext*, const StringVal& src);
|
||||
|
||||
// Probabilistic Counting (PC), a distinct estimate algorithms.
|
||||
// Probabilistic Counting with Stochastic Averaging (PCSA) is a variant
|
||||
// of PC that runs faster and usually gets equally accurate results.
|
||||
static void pc_init(doris_udf::FunctionContext*, doris_udf::StringVal* slot);
|
||||
|
||||
template <typename T>
|
||||
static void pc_update(doris_udf::FunctionContext*, const T& src, doris_udf::StringVal* dst);
|
||||
template <typename T>
|
||||
static void pcsa_update(doris_udf::FunctionContext*, const T& src, doris_udf::StringVal* dst);
|
||||
|
||||
static void pc_merge(doris_udf::FunctionContext*, const doris_udf::StringVal& src,
|
||||
doris_udf::StringVal* dst);
|
||||
|
||||
static doris_udf::StringVal pc_finalize(doris_udf::FunctionContext*,
|
||||
const doris_udf::StringVal& src);
|
||||
|
||||
static doris_udf::StringVal pcsa_finalize(doris_udf::FunctionContext*,
|
||||
const doris_udf::StringVal& src);
|
||||
|
||||
// count and sum distinct algorithm in multi distinct
|
||||
template <typename T>
|
||||
static void count_or_sum_distinct_numeric_init(doris_udf::FunctionContext* ctx,
|
||||
doris_udf::StringVal* dst);
|
||||
template <typename T>
|
||||
static void count_or_sum_distinct_numeric_update(FunctionContext* ctx, T& src, StringVal* dst);
|
||||
template <typename T>
|
||||
static void count_or_sum_distinct_numeric_merge(FunctionContext* ctx, StringVal& src,
|
||||
StringVal* dst);
|
||||
template <typename T>
|
||||
static StringVal count_or_sum_distinct_numeric_serialize(FunctionContext* ctx,
|
||||
const StringVal& state_sv);
|
||||
template <typename T>
|
||||
static BigIntVal count_or_sum_distinct_numeric_finalize(FunctionContext* ctx,
|
||||
const StringVal& state_sv);
|
||||
|
||||
// count distinct in multi distinct for string
|
||||
static void count_distinct_string_init(doris_udf::FunctionContext* ctx,
|
||||
doris_udf::StringVal* dst);
|
||||
static void count_distinct_string_update(FunctionContext* ctx, StringVal& src, StringVal* dst);
|
||||
static void count_distinct_string_merge(FunctionContext* ctx, StringVal& src, StringVal* dst);
|
||||
static StringVal count_distinct_string_serialize(FunctionContext* ctx,
|
||||
const StringVal& state_sv);
|
||||
static BigIntVal count_distinct_string_finalize(FunctionContext* ctx,
|
||||
const StringVal& state_sv);
|
||||
|
||||
// count distinct in multi distinct for decimal
|
||||
static void count_or_sum_distinct_decimal_init(doris_udf::FunctionContext* ctx,
|
||||
doris_udf::StringVal* dst);
|
||||
static void count_or_sum_distinct_decimalv2_init(doris_udf::FunctionContext* ctx,
|
||||
doris_udf::StringVal* dst);
|
||||
static void count_or_sum_distinct_decimalv2_update(FunctionContext* ctx, DecimalV2Val& src,
|
||||
StringVal* dst);
|
||||
static void count_or_sum_distinct_decimal_merge(FunctionContext* ctx, StringVal& src,
|
||||
StringVal* dst);
|
||||
static void count_or_sum_distinct_decimalv2_merge(FunctionContext* ctx, StringVal& src,
|
||||
StringVal* dst);
|
||||
|
||||
static StringVal count_or_sum_distinct_decimalv2_serialize(FunctionContext* ctx,
|
||||
const StringVal& state_sv);
|
||||
|
||||
static BigIntVal count_distinct_decimalv2_finalize(FunctionContext* ctx,
|
||||
const StringVal& state_sv);
|
||||
static DecimalV2Val sum_distinct_decimalv2_finalize(FunctionContext* ctx,
|
||||
const StringVal& state_sv);
|
||||
|
||||
// count distinct in multi distinct for Date
|
||||
static void count_distinct_date_init(doris_udf::FunctionContext* ctx,
|
||||
doris_udf::StringVal* dst);
|
||||
static void count_distinct_date_update(FunctionContext* ctx, DateTimeVal& src, StringVal* dst);
|
||||
static void count_distinct_date_merge(FunctionContext* ctx, StringVal& src, StringVal* dst);
|
||||
static StringVal count_distinct_date_serialize(FunctionContext* ctx, const StringVal& state_sv);
|
||||
static BigIntVal count_distinct_date_finalize(FunctionContext* ctx, const StringVal& state_sv);
|
||||
|
||||
template <typename T>
|
||||
static BigIntVal sum_distinct_bigint_finalize(FunctionContext* ctx, const StringVal& state_sv);
|
||||
template <typename T>
|
||||
static LargeIntVal sum_distinct_largeint_finalize(FunctionContext* ctx,
|
||||
const StringVal& state_sv);
|
||||
template <typename T>
|
||||
static DoubleVal sum_distinct_double_finalize(FunctionContext* ctx, const StringVal& state_sv);
|
||||
|
||||
/// Knuth's variance algorithm, more numerically stable than canonical stddev
|
||||
/// algorithms; reference implementation:
|
||||
/// http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
|
||||
static void knuth_var_init(FunctionContext* context, StringVal* val);
|
||||
template <typename T>
|
||||
static void knuth_var_update(FunctionContext* context, const T& input, StringVal* val);
|
||||
template <typename T>
|
||||
static void knuth_var_remove(FunctionContext* context, const T& src, StringVal* dst);
|
||||
static void knuth_var_merge(FunctionContext* context, const StringVal& src, StringVal* dst);
|
||||
static DoubleVal knuth_var_finalize(FunctionContext* context, const StringVal& val);
|
||||
|
||||
/// Calculates the biased variance, uses KnuthVar Init-Update-Merge functions
|
||||
static DoubleVal knuth_var_pop_finalize(FunctionContext* context, const StringVal& val);
|
||||
|
||||
/// Calculates STDDEV, uses KnuthVar Init-Update-Merge functions
|
||||
static DoubleVal knuth_stddev_finalize(FunctionContext* context, const StringVal& val);
|
||||
|
||||
/// Calculates the biased STDDEV, uses KnuthVar Init-Update-Merge functions
|
||||
static DoubleVal knuth_stddev_pop_finalize(FunctionContext* context, const StringVal& val);
|
||||
|
||||
static DoubleVal knuth_var_get_value(FunctionContext* ctx, const StringVal& state_sv);
|
||||
static DoubleVal knuth_var_pop_get_value(FunctionContext* context, const StringVal& val);
|
||||
static DoubleVal knuth_stddev_get_value(FunctionContext* ctx, const StringVal& state_sv);
|
||||
static DoubleVal knuth_stddev_pop_get_value(FunctionContext* context, const StringVal& val);
|
||||
|
||||
// variance/stddev for decimals.
|
||||
static void decimalv2_knuth_var_init(FunctionContext* context, StringVal* val);
|
||||
static void knuth_var_remove(FunctionContext* ctx, const DecimalV2Val& src, StringVal* dst);
|
||||
static void knuth_var_update(FunctionContext* context, const DecimalV2Val& src, StringVal* val);
|
||||
static void decimalv2_knuth_var_merge(FunctionContext* context, const StringVal& src,
|
||||
StringVal* val);
|
||||
static DecimalV2Val decimalv2_knuth_var_finalize(FunctionContext* context,
|
||||
const StringVal& val);
|
||||
static DecimalV2Val decimalv2_knuth_var_pop_finalize(FunctionContext* context,
|
||||
const StringVal& val);
|
||||
static DecimalV2Val decimalv2_knuth_stddev_finalize(FunctionContext* context,
|
||||
const StringVal& val);
|
||||
static DecimalV2Val decimalv2_knuth_stddev_pop_finalize(FunctionContext* context,
|
||||
const StringVal& val);
|
||||
|
||||
static DecimalV2Val decimalv2_knuth_var_get_value(FunctionContext* ctx,
|
||||
const StringVal& state_sv);
|
||||
static DecimalV2Val decimalv2_knuth_var_pop_get_value(FunctionContext* context,
|
||||
const StringVal& val);
|
||||
static DecimalV2Val decimalv2_knuth_stddev_get_value(FunctionContext* context,
|
||||
const StringVal& val);
|
||||
static DecimalV2Val decimalv2_knuth_stddev_pop_get_value(FunctionContext* context,
|
||||
const StringVal& val);
|
||||
|
||||
/// ----------------------------- Analytic Functions ---------------------------------
|
||||
/// Analytic functions implement the UDA interface (except Merge(), Serialize()) and are
|
||||
/// used internally by the AnalyticEvalNode. Some analytic functions store intermediate
|
||||
/// state as a StringVal which is needed for multiple calls to Finalize(), so some fns
|
||||
/// also implement a (private) GetValue() method to just return the value. In that
|
||||
/// case, Finalize() is only called at the end to clean up.
|
||||
|
||||
// Initializes the state for RANK and DENSE_RANK
|
||||
static void rank_init(doris_udf::FunctionContext*, doris_udf::StringVal* slot);
|
||||
|
||||
// Update state for RANK
|
||||
static void rank_update(doris_udf::FunctionContext*, doris_udf::StringVal* dst);
|
||||
|
||||
// Update state for DENSE_RANK
|
||||
static void dense_rank_update(doris_udf::FunctionContext*, doris_udf::StringVal* dst);
|
||||
|
||||
// Returns the result for RANK and prepares the state for the next Update().
|
||||
static doris_udf::BigIntVal rank_get_value(doris_udf::FunctionContext*,
|
||||
doris_udf::StringVal& src);
|
||||
|
||||
// Returns the result for DENSE_RANK and prepares the state for the next Update().
|
||||
// TODO: Implement DENSE_RANK with a single doris_udf::BigIntVal. Requires src can be modified,
|
||||
// AggFnEvaluator would need to handle copying the src doris_udf::AnyVal back into the src slot.
|
||||
static doris_udf::BigIntVal dense_rank_get_value(doris_udf::FunctionContext*,
|
||||
doris_udf::StringVal& src);
|
||||
|
||||
// Returns the result for RANK and DENSE_RANK and cleans up intermediate state in src.
|
||||
static doris_udf::BigIntVal rank_finalize(doris_udf::FunctionContext*,
|
||||
doris_udf::StringVal& src);
|
||||
|
||||
// Implements LAST_VALUE.
|
||||
template <typename T>
|
||||
static void last_val_update(doris_udf::FunctionContext*, const T& src, T* dst);
|
||||
template <typename T>
|
||||
static void last_val_remove(doris_udf::FunctionContext*, const T& src, T* dst);
|
||||
|
||||
// Implements FIRST_VALUE.
|
||||
template <typename T>
|
||||
static void first_val_update(doris_udf::FunctionContext*, const T& src, T* dst);
|
||||
// Implements FIRST_VALUE for some windows that require rewrites during planning.
|
||||
// The doris_udf::BigIntVal is unused by first_val_rewrite_update() (it is used by the
|
||||
// AnalyticEvalNode).
|
||||
template <typename T>
|
||||
static void first_val_rewrite_update(doris_udf::FunctionContext*, const T& src,
|
||||
const doris_udf::BigIntVal&, T* dst);
|
||||
|
||||
// OffsetFn*() implement LAG and LEAD. Init() sets the default value (the last
|
||||
// constant parameter) as dst.
|
||||
template <typename T>
|
||||
static void offset_fn_init(doris_udf::FunctionContext*, T* dst);
|
||||
|
||||
// Update() takes all the parameters to LEAD/LAG, including the integer offset and
|
||||
// the default value, neither which are needed by Update(). (The offset is already
|
||||
// used in the window for the analytic fn evaluation and the default value is set
|
||||
// in Init().
|
||||
template <typename T>
|
||||
static void offset_fn_update(doris_udf::FunctionContext*, const T& src,
|
||||
const doris_udf::BigIntVal&, const T&, T* dst);
|
||||
|
||||
// windowFunnel
|
||||
static void window_funnel_init(FunctionContext* ctx, StringVal* dst);
|
||||
static void window_funnel_update(FunctionContext* ctx, const BigIntVal& window,
|
||||
const StringVal& mode, const DateTimeVal& timestamp,
|
||||
int num_cond, const BooleanVal* conds, StringVal* dst);
|
||||
static void window_funnel_merge(FunctionContext* ctx, const StringVal& src, StringVal* dst);
|
||||
static StringVal window_funnel_serialize(FunctionContext* ctx, const StringVal& src);
|
||||
static IntVal window_funnel_finalize(FunctionContext* ctx, const StringVal& src);
|
||||
|
||||
// todo(kks): keep following HLL methods only for backward compatibility, we should remove these methods
|
||||
// when doris 0.12 release
|
||||
static void hll_init(doris_udf::FunctionContext*, doris_udf::StringVal* slot);
|
||||
template <typename T>
|
||||
static void hll_update(doris_udf::FunctionContext*, const T& src, doris_udf::StringVal* dst);
|
||||
static void hll_merge(doris_udf::FunctionContext*, const doris_udf::StringVal& src,
|
||||
doris_udf::StringVal* dst);
|
||||
static doris_udf::StringVal hll_finalize(doris_udf::FunctionContext*,
|
||||
const doris_udf::StringVal& src);
|
||||
|
||||
static void hll_union_agg_init(doris_udf::FunctionContext*, doris_udf::HllVal* slot);
|
||||
// fill all register according to hll set type
|
||||
static void hll_union_agg_update(doris_udf::FunctionContext*, const doris_udf::HllVal& src,
|
||||
doris_udf::HllVal* dst);
|
||||
// merge the register value
|
||||
static void hll_union_agg_merge(doris_udf::FunctionContext*, const doris_udf::HllVal& src,
|
||||
doris_udf::HllVal* dst);
|
||||
// return result
|
||||
static doris_udf::BigIntVal hll_union_agg_finalize(doris_udf::FunctionContext*,
|
||||
const doris_udf::HllVal& src);
|
||||
|
||||
// calculate result
|
||||
static int64_t hll_algorithm(uint8_t* pdata, int data_len);
|
||||
static int64_t hll_algorithm(const StringVal& dst) { return hll_algorithm(dst.ptr, dst.len); }
|
||||
static int64_t hll_algorithm(const HllVal& dst) {
|
||||
return hll_algorithm(dst.ptr + 1, dst.len - 1);
|
||||
}
|
||||
|
||||
// HLL value type aggregate to HLL value type
|
||||
static void hll_raw_agg_init(doris_udf::FunctionContext*, doris_udf::HllVal* slot);
|
||||
static void hll_raw_agg_update(doris_udf::FunctionContext*, const doris_udf::HllVal& src,
|
||||
doris_udf::HllVal* dst);
|
||||
static void hll_raw_agg_merge(doris_udf::FunctionContext*, const doris_udf::HllVal& src,
|
||||
doris_udf::HllVal* dst);
|
||||
// return result which is HLL type
|
||||
static doris_udf::HllVal hll_raw_agg_finalize(doris_udf::FunctionContext*,
|
||||
const doris_udf::HllVal& src);
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
@ -1,65 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "exprs/time_operators.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
#include "exprs/anyval_util.h"
|
||||
#include "util/date_func.h"
|
||||
|
||||
namespace doris {
|
||||
void TimeOperators::init() {}
|
||||
|
||||
#define CAST_TIME_TO_INT(to_type, type_name) \
|
||||
to_type TimeOperators::cast_to_##type_name(FunctionContext* context, const DoubleVal& val) { \
|
||||
if (val.is_null) return to_type::null(); \
|
||||
int time = (int)val.val; \
|
||||
int second = time % 60; \
|
||||
int minute = time / 60 % 60; \
|
||||
int hour = time / 3600; \
|
||||
return to_type(hour * 10000 + minute * 100 + second); \
|
||||
}
|
||||
|
||||
#define CAST_FROM_TIME() \
|
||||
CAST_TIME_TO_INT(BooleanVal, boolean_val); \
|
||||
CAST_TIME_TO_INT(TinyIntVal, tiny_int_val); \
|
||||
CAST_TIME_TO_INT(SmallIntVal, small_int_val); \
|
||||
CAST_TIME_TO_INT(IntVal, int_val); \
|
||||
CAST_TIME_TO_INT(BigIntVal, big_int_val); \
|
||||
CAST_TIME_TO_INT(LargeIntVal, large_int_val); \
|
||||
CAST_TIME_TO_INT(FloatVal, float_val); \
|
||||
CAST_TIME_TO_INT(DoubleVal, double_val);
|
||||
|
||||
CAST_FROM_TIME();
|
||||
|
||||
StringVal TimeOperators::cast_to_string_val(FunctionContext* ctx, const DoubleVal& val) {
|
||||
if (val.is_null) {
|
||||
return StringVal::null();
|
||||
}
|
||||
char buffer[MAX_TIME_WIDTH];
|
||||
int len = time_to_buffer_from_double(val.val, buffer);
|
||||
return AnyValUtil::from_buffer_temp(ctx, buffer, len);
|
||||
}
|
||||
|
||||
DateTimeVal TimeOperators::cast_to_datetime_val(FunctionContext* context, const DoubleVal& val) {
|
||||
return DateTimeVal::null();
|
||||
}
|
||||
} // namespace doris
|
||||
@ -1,43 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "udf/udf.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
/// Implementation of the time operators. These include the cast,
|
||||
/// arithmetic and binary operators.
|
||||
class TimeOperators {
|
||||
public:
|
||||
static void init();
|
||||
|
||||
static BooleanVal cast_to_boolean_val(FunctionContext*, const DoubleVal&);
|
||||
static TinyIntVal cast_to_tiny_int_val(FunctionContext*, const DoubleVal&);
|
||||
static SmallIntVal cast_to_small_int_val(FunctionContext*, const DoubleVal&);
|
||||
static IntVal cast_to_int_val(FunctionContext*, const DoubleVal&);
|
||||
static BigIntVal cast_to_big_int_val(FunctionContext*, const DoubleVal&);
|
||||
static LargeIntVal cast_to_large_int_val(FunctionContext*, const DoubleVal&);
|
||||
static FloatVal cast_to_float_val(FunctionContext*, const DoubleVal&);
|
||||
static DoubleVal cast_to_double_val(FunctionContext*, const DoubleVal&);
|
||||
static StringVal cast_to_string_val(FunctionContext*, const DoubleVal&);
|
||||
static DateTimeVal cast_to_datetime_val(FunctionContext*, const DoubleVal&);
|
||||
};
|
||||
} // namespace doris
|
||||
@ -29,10 +29,8 @@ add_library(Olap STATIC
|
||||
base_tablet.cpp
|
||||
bloom_filter.hpp
|
||||
block_column_predicate.cpp
|
||||
byte_buffer.cpp
|
||||
compaction.cpp
|
||||
compaction_permit_limiter.cpp
|
||||
compress.cpp
|
||||
compaction_permit_limiter.cpp
|
||||
cumulative_compaction.cpp
|
||||
cumulative_compaction_policy.cpp
|
||||
delete_handler.cpp
|
||||
|
||||
@ -1,753 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace doris {
|
||||
|
||||
inline int memcmp_sse(const void* buf1, const void* buf2, unsigned int count) {
|
||||
int result;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"cmpl $16, %%edx;"
|
||||
"jb 9f;"
|
||||
"16:" /* len >= 16 */
|
||||
"movdqu (%%rdi), %%xmm0;"
|
||||
"movdqu (%%rsi), %%xmm1;"
|
||||
"pcmpeqb %%xmm1, %%xmm0;"
|
||||
"pmovmskb %%xmm0,%%rcx;"
|
||||
"xorl $0xffff, %%ecx;"
|
||||
"jz 15f;"
|
||||
"bsf %%ecx, %%ecx;" /* diff */
|
||||
"movzb (%%rsi, %%rcx), %%edx;"
|
||||
"movzb (%%rdi, %%rcx), %%eax;"
|
||||
"subl %%edx, %%eax;"
|
||||
"jmp 0f;"
|
||||
"15:" /* same */
|
||||
"subl $16, %%edx;"
|
||||
"jbe 1f;"
|
||||
"movq $16, %%rcx;"
|
||||
"cmpl $16, %%edx;"
|
||||
"jae 14f;"
|
||||
"movl %%edx, %%ecx;"
|
||||
"14:"
|
||||
//"addq %%rcx, %%rdi;"
|
||||
"lea (%%rdi,%%rcx), %%rdi;"
|
||||
"addq %%rcx, %%rsi;"
|
||||
"jmp 16b;"
|
||||
|
||||
"9:" /* 8 =< len < 15 */
|
||||
"cmpl $8, %%edx;"
|
||||
"jb 5f;"
|
||||
"8:"
|
||||
"movq (%%rdi), %%xmm0;"
|
||||
"movq (%%rsi), %%xmm1;"
|
||||
"pcmpeqb %%xmm1, %%xmm0;"
|
||||
"pmovmskb %%xmm0, %%rcx;"
|
||||
"and $0xff, %%ecx;"
|
||||
"xorl $0xff, %%ecx;"
|
||||
"je 7f;"
|
||||
"bsf %%ecx, %%ecx;" /* diff */
|
||||
"movzb (%%rsi, %%rcx), %%edx;"
|
||||
"movzb (%%rdi, %%rcx), %%eax;"
|
||||
"subl %%edx, %%eax;"
|
||||
"jmp 0f;"
|
||||
|
||||
"7:"
|
||||
"subl $8, %%edx;"
|
||||
"jz 1f;"
|
||||
"movl %%edx, %%ecx;"
|
||||
"movq (%%rdi, %%rcx), %%xmm0;"
|
||||
"movq (%%rsi, %%rcx), %%xmm1;"
|
||||
"pcmpeqb %%xmm1, %%xmm0;"
|
||||
"pmovmskb %%xmm0, %%rcx;"
|
||||
"and $0xff, %%ecx;"
|
||||
"xorl $0xff, %%ecx;"
|
||||
"je 1f;"
|
||||
"bsf %%ecx, %%ecx;"
|
||||
"addl %%edx, %%ecx;"
|
||||
"movzb (%%rsi, %%rcx), %%edx;"
|
||||
"movzb (%%rdi, %%rcx), %%eax;"
|
||||
"subl %%edx, %%eax;"
|
||||
"jmp 0f;"
|
||||
|
||||
"5:"
|
||||
"cmpl $4, %%edx;"
|
||||
"jb 13f;"
|
||||
"4:"
|
||||
"subl $4, %%edx;"
|
||||
"movl (%%rdi), %%eax;"
|
||||
"movl (%%rsi), %%ecx;"
|
||||
"cmpl %%ecx, %%eax;"
|
||||
"je 3f;"
|
||||
"bswap %%eax;"
|
||||
"bswap %%ecx;"
|
||||
"cmpl %%ecx, %%eax;"
|
||||
"ja 17f;"
|
||||
"mov $-1, %%eax;"
|
||||
"jmp 0f;"
|
||||
"17:"
|
||||
"mov $1, %%eax;"
|
||||
"jmp 0f;"
|
||||
"3:"
|
||||
"addq $4, %%rdi;"
|
||||
"lea 4(%%rsi), %%rsi;"
|
||||
"13:"
|
||||
"cmpl $0, %%edx;"
|
||||
"je 1f;"
|
||||
"2:"
|
||||
|
||||
"movzbl (%%rdi), %%eax;"
|
||||
"movzbl (%%rsi), %%ecx;"
|
||||
"subl %%ecx, %%eax;"
|
||||
"jne 0f;"
|
||||
"subl $1, %%edx;"
|
||||
"jz 1f;"
|
||||
"movzbl 1(%%rdi), %%eax;"
|
||||
"movzbl 1(%%rsi), %%ecx;"
|
||||
"subl %%ecx, %%eax;"
|
||||
"jne 0f;"
|
||||
"subl $1, %%edx;"
|
||||
"jz 1f;"
|
||||
"movzbl 2(%%rdi), %%eax;"
|
||||
"movzbl 2(%%rsi), %%ecx;"
|
||||
"subl %%ecx, %%eax;"
|
||||
"jmp 0f;"
|
||||
|
||||
"1:"
|
||||
"xorl %%eax, %%eax;"
|
||||
"0:"
|
||||
: "=a"(result), "=D"(buf1), "=S"(buf2), "=d"(count)
|
||||
: "D"(buf1), "S"(buf2), "d"(count)
|
||||
: "%rcx", "%xmm1", "%xmm0", "memory");
|
||||
return result;
|
||||
}
|
||||
|
||||
//count must be between 0 and 2GB
|
||||
/*__attribute__((always_inline))*/ inline int memcmp_sse32(const void* buf1, const void* buf2,
|
||||
int count)
|
||||
|
||||
{
|
||||
int result;
|
||||
__asm__ __volatile__(
|
||||
//".align 8;"
|
||||
"cmp $1, %%edx;"
|
||||
"jbe 6f;"
|
||||
|
||||
"addl $16, %%edx ;"
|
||||
"movl %%edx, %%eax ;"
|
||||
"xor %%rcx, %%rcx ;"
|
||||
|
||||
"2: "
|
||||
"movdqu (%%rdi), %%xmm1;"
|
||||
"movdqu (%%rsi), %%xmm2;"
|
||||
"subl $16, %%edx ;"
|
||||
"subl $16, %%eax ;"
|
||||
|
||||
// " pcmpestri $0x18, %%xmm2, %%xmm1 ;"
|
||||
".byte 0x66, 0x0f, 0x3a, 0x61, 0xca, 0x18;"
|
||||
" lea 16(%%rsi), %%rsi ;"
|
||||
" lea 16(%%rdi), %%rdi ;"
|
||||
//zflag=0 and cflag=0;no diff and no end, so continue the loop
|
||||
" ja 2b ;"
|
||||
// if cflag=1, jmp; no end but diff
|
||||
" jc 1f ;"
|
||||
|
||||
"xorl %%eax, %%eax;"
|
||||
"jmp 0f;"
|
||||
|
||||
"6:"
|
||||
"xor %%eax, %%eax;"
|
||||
"test %%edx, %%edx ;"
|
||||
"jz 0f ;"
|
||||
"movzbl (%%rdi), %%eax;"
|
||||
"movzbl (%%rsi), %%edx;"
|
||||
"subl %%edx, %%eax;"
|
||||
"jmp 0f;"
|
||||
|
||||
"1:"
|
||||
"movzbl -16(%%rsi, %%rcx), %%edx ;"
|
||||
"movzbl -16(%%rdi, %%rcx), %%eax ;"
|
||||
"subl %%edx, %%eax ;"
|
||||
|
||||
"0:"
|
||||
//"mov %%eax, %0;"
|
||||
|
||||
: "=a"(result), "=D"(buf1), "=S"(buf2), "=d"(count)
|
||||
: "D"(buf1), "S"(buf2), "d"(count)
|
||||
: "%rcx", "memory", "xmm1", "xmm2");
|
||||
return result;
|
||||
}
|
||||
|
||||
/*__attribute__((always_inline))*/ inline int memcmp_sse64(const void* buf1, const void* buf2,
|
||||
size_t count) {
|
||||
int result;
|
||||
__asm__ __volatile__(
|
||||
"cmp $1, %%rdx;"
|
||||
"jbe 6f;"
|
||||
|
||||
"addq $16, %%rdx;"
|
||||
"movq %%rdx,%%rax;"
|
||||
//"xor %%rcx, %%rcx ;"
|
||||
|
||||
"2: "
|
||||
"movdqu (%%rdi), %%xmm1;"
|
||||
"movdqu (%%rsi), %%xmm2;"
|
||||
|
||||
"subq $16, %%rax;"
|
||||
"subq $16, %%rdx;"
|
||||
|
||||
//"addq $16, %%rsi;"
|
||||
//"addq $16, %%rdi;"
|
||||
// " pcmpestri $0x18, %%xmm2, %%xmm1 ;"
|
||||
".byte 0x66, 0x0f, 0x3a, 0x61, 0xca, 0x18;"
|
||||
"lea 16(%%rsi), %%rsi;"
|
||||
"lea 16(%%rdi), %%rdi;"
|
||||
"ja 2b;" //no diff and no end, so continue the loop
|
||||
"jc 1f;" // no end but diff
|
||||
|
||||
"xorl %%eax, %%eax;"
|
||||
"jmp 0f;"
|
||||
|
||||
"6:"
|
||||
"xor %%eax, %%eax;"
|
||||
"test %%edx, %%edx ;"
|
||||
"jz 0f ;"
|
||||
"movzbl (%%rdi), %%eax;"
|
||||
"movzbl (%%rsi), %%edx;"
|
||||
"subl %%edx, %%eax;"
|
||||
"jmp 0f;"
|
||||
|
||||
"1:"
|
||||
"movzbl -16(%%rsi, %%rcx), %%edx ;"
|
||||
"movzbl -16(%%rdi, %%rcx), %%eax ;"
|
||||
"subl %%edx, %%eax ;"
|
||||
|
||||
"0:"
|
||||
//"mov %%eax, %0;"
|
||||
|
||||
: "=a"(result), "=D"(buf1), "=S"(buf2), "=d"(count)
|
||||
: "D"(buf1), "S"(buf2), "d"(count)
|
||||
: "%rcx", "memory", "xmm1", "xmm2");
|
||||
return result;
|
||||
}
|
||||
|
||||
/*__attribute__((always_inline))*/ inline int find_chr_from_mem(const char* s, int c, int len) {
|
||||
//len : edx; c: esi; s:rdi
|
||||
int index;
|
||||
__asm__ __volatile__(
|
||||
"and $0xff, %%esi;" //clear upper bytes
|
||||
"movd %%esi, %%xmm1;"
|
||||
|
||||
"mov $1, %%eax;"
|
||||
"add $16, %%edx;"
|
||||
"mov %%rdi ,%%r8;"
|
||||
|
||||
"1:"
|
||||
"movdqu (%%rdi), %%xmm2;"
|
||||
"sub $16, %%edx;"
|
||||
"addq $16, %%rdi;"
|
||||
//"pcmpestri $0x0, %%xmm2,%%xmm1;"
|
||||
".byte 0x66 ,0x0f ,0x3a ,0x61 ,0xca ,0x00;"
|
||||
//"lea 16(%%rdi), %%rdi;"
|
||||
"ja 1b;" //Res2==0:no match and zflag==0: s is not end
|
||||
"jc 3f;" //Res2==1: match and s is not end
|
||||
|
||||
"mov $0xffffffff, %%eax;" //no match
|
||||
"jmp 0f;"
|
||||
|
||||
"3:"
|
||||
"sub %%r8, %%rdi;"
|
||||
"lea -16(%%edi,%%ecx),%%eax;"
|
||||
|
||||
"0:"
|
||||
// "mov %%eax, %0;"
|
||||
: "=a"(index), "=D"(s), "=S"(c), "=d"(len)
|
||||
: "D"(s), "S"(c), "d"(len)
|
||||
: "rcx", "r8", "memory", "xmm1", "xmm2");
|
||||
return index;
|
||||
}
|
||||
|
||||
/*__attribute__((always_inline))*/ inline int find_chr_from_str(const char* s, int c, int len) {
|
||||
//s:rdi; c:rsi; len:rdx
|
||||
int index;
|
||||
__asm__ __volatile__(
|
||||
"and $0xff, %%esi;" //clear upper bytes
|
||||
"movd %%esi, %%xmm1;"
|
||||
"xor %%r8d,%%r8d;"
|
||||
|
||||
"1:"
|
||||
"movdqu (%%rdi), %%xmm2;"
|
||||
"add $16, %%r8d;"
|
||||
"addq $16, %%rdi;"
|
||||
// "pcmpistri $0x0, %%xmm2,%%xmm1;"
|
||||
".byte 0x66 ,0x0f ,0x3a ,0x63 ,0xca ,0x00;"
|
||||
//"lea 16(%%rdi), %%rdi;"
|
||||
"ja 4f;" // not null and no match, so clarify whether over the end
|
||||
"jc 2f;" //match
|
||||
"jmp 3f;" //null and no match
|
||||
|
||||
"4:"
|
||||
"cmp %%r8d,%%edx;"
|
||||
"ja 1b;"
|
||||
|
||||
"3:"
|
||||
"mov $0xffffffff, %%eax;" // the end and no match
|
||||
"jmp 0f;"
|
||||
|
||||
"2:"
|
||||
|
||||
"lea -16(%%r8d, %%ecx), %%eax;"
|
||||
"cmp %%edx, %%eax;"
|
||||
"jae 3b;"
|
||||
|
||||
"0:"
|
||||
// "mov %%eax, %0;"
|
||||
|
||||
: "=a"(index), "=D"(s), "=S"(c), "=d"(len)
|
||||
: "D"(s), "S"(c), "d"(len)
|
||||
: "rcx", "r8", "memory", "xmm1", "xmm2");
|
||||
return index;
|
||||
}
|
||||
|
||||
/*__attribute__((always_inline))*/ inline char* strchr_sse(const char* s, int c) {
|
||||
//s:rdi; c:rsi
|
||||
char* ret;
|
||||
__asm__ __volatile__(
|
||||
"and $0xff, %%esi;" //clear upper bytes
|
||||
//c==0
|
||||
"test %%esi, %%esi;"
|
||||
"jnz 0f ;"
|
||||
"movq %%rdi, %%rax;"
|
||||
"pxor %%xmm1, %%xmm1;"
|
||||
"3:"
|
||||
"movdqu (%%rdi), %%xmm2;"
|
||||
|
||||
"addq $16, %%rdi;"
|
||||
// "pcmpistri $0x8, %%xmm2,%%xmm1;"
|
||||
".byte 0x66, 0x0f, 0x3a, 0x63, 0xca, 0x08;"
|
||||
"jnz 3b;"
|
||||
|
||||
"leaq -16(%%rdi,%%rcx), %%rax;"
|
||||
"jmp 2f;"
|
||||
|
||||
"0:"
|
||||
"movd %%esi, %%xmm1;"
|
||||
//"xor %%rcx, %%rcx;"
|
||||
"xor %%rax, %%rax;"
|
||||
|
||||
"1:"
|
||||
"movdqu (%%rdi), %%xmm2;"
|
||||
|
||||
"addq $16, %%rdi;"
|
||||
// "pcmpistri $0x0, %%xmm2,%%xmm1;"
|
||||
".byte 0x66 ,0x0f ,0x3a ,0x63 ,0xca ,0x00;"
|
||||
"ja 1b;"
|
||||
"jnc 2f;"
|
||||
"lea -16(%%rdi, %%rcx), %%rax;"
|
||||
"2:"
|
||||
|
||||
: "=a"(ret), "=D"(s), "=S"(c)
|
||||
: "D"(s), "S"(c)
|
||||
: "rcx", "memory", "xmm1", "xmm2");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*__attribute__((always_inline))*/ inline char* strrchr_sse(const char* s, int c) {
|
||||
//s:rdi; c:rsi
|
||||
char* ret;
|
||||
__asm__ __volatile__(
|
||||
"and $0xff, %%esi;" //clear upper bytes
|
||||
//c==0
|
||||
"test %%esi, %%esi;"
|
||||
"jnz 0f ;"
|
||||
|
||||
"movq %%rdi, %%rax;"
|
||||
"pxor %%xmm1, %%xmm1;"
|
||||
"3:"
|
||||
"movdqu (%%rdi), %%xmm2;"
|
||||
|
||||
"addq $16, %%rdi;"
|
||||
// "pcmpistri $0x8, %%xmm2,%%xmm1;"
|
||||
".byte 0x66, 0x0f, 0x3a, 0x63, 0xca, 0x08;"
|
||||
"jnz 3b;"
|
||||
|
||||
"leaq -16(%%rdi,%%rcx), %%rax;"
|
||||
"jmp 3f;"
|
||||
|
||||
"0:"
|
||||
"movd %%esi, %%xmm1;"
|
||||
//"xor %%rcx, %%rcx;"
|
||||
"xor %%rax, %%rax;"
|
||||
|
||||
"1:"
|
||||
"movdqu (%%rdi), %%xmm2;"
|
||||
|
||||
"addq $16, %%rdi;"
|
||||
// "pcmpistri $0x40, %%xmm2,%%xmm1;"
|
||||
".byte 0x66 ,0x0f ,0x3a ,0x63 ,0xca ,0x40;"
|
||||
"ja 1b;" //zflag =0 and cflag =0, it means no end and no match
|
||||
|
||||
"jz 2f;" //zflag =1, the end of string
|
||||
"lea -16(%%rdi, %%rcx), %%rax;" //cflag =1
|
||||
"jmp 1b;"
|
||||
|
||||
"2:"
|
||||
"jnc 3f;"
|
||||
"lea -16(%%rdi, %%rcx), %%rax;"
|
||||
"3:"
|
||||
//"mov %%rax, %0;"
|
||||
: "=a"(ret), "=D"(s), "=S"(c)
|
||||
: "D"(s), "S"(c)
|
||||
: "rcx", "memory", "xmm1", "xmm2");
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline char* strrchr_end_sse(char const* b, char const* e, char c) {
|
||||
//b:rdi; e:rsi; c:rdx
|
||||
char* ret;
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
// "movzbq %5, %%rdx;"
|
||||
// "mov %%rdx, %%r8;"
|
||||
"movzbq %5, %%r8;"
|
||||
|
||||
"cmp $0, %%rdi;"
|
||||
"jbe 1f;"
|
||||
|
||||
//calculate rdx, decide where to go
|
||||
"mov %%rsi, %%rdx;"
|
||||
"subq %%rdi, %%rdx;"
|
||||
"jbe 1f;" // if begin >= end, return
|
||||
"cmp $7, %%rdx;"
|
||||
"jna 2f;"
|
||||
|
||||
// rdx >= 8
|
||||
"movd %%r8, %%xmm1;"
|
||||
"mov $1, %%rax;"
|
||||
"cmp $16, %%rdx;"
|
||||
"ja 3f;" // if rdx > 16, jmp to 3f
|
||||
|
||||
"5:"
|
||||
// 8 <= rdx <= 16
|
||||
"subq %%rdx, %%rsi;"
|
||||
"movdqu (%%rsi), %%xmm2;"
|
||||
// "pcmpestri $0x40, %%xmm2, %%xmm1;"
|
||||
".byte 0x66, 0x0f, 0x3a, 0x61, 0xca, 0x40;"
|
||||
"jnc 1f; " // if cflag=0, not match, jmp to 1f
|
||||
"lea (%%rsi, %%rcx), %%rax;" // matched
|
||||
"jmp 0f;"
|
||||
|
||||
// after 16-bytes compare
|
||||
"4:"
|
||||
"subq $16, %%rdx;"
|
||||
"cmp $7, %%rdx;"
|
||||
"jna 2f;" // if rdx < 8, jmp to 2f
|
||||
"cmp $16, %%rdx;"
|
||||
"jna 5b;"
|
||||
|
||||
"3:"
|
||||
"subq $16, %%rsi;"
|
||||
"movdqu (%%rsi), %%xmm2;"
|
||||
// "pcmpestri $0x40, %%xmm2, %%xmm1;"
|
||||
".byte 0x66, 0x0f, 0x3a, 0x61, 0xca, 0x40;"
|
||||
"ja 4b;" // cflag = 0:not match && zflag = 0:not end >>> loopback
|
||||
"lea (%%rsi, %%rcx), %%rax;" // rdx > 16, zflag always = 0, match
|
||||
"jmp 0f;"
|
||||
|
||||
"2:"
|
||||
// 0 < rdx < 8
|
||||
"mov %%r8, %%rax;"
|
||||
|
||||
// switch rdx;
|
||||
"cmpb -1(%%rsi), %%al;"
|
||||
"jne 11f;"
|
||||
"lea -1(%%rsi), %%rax;"
|
||||
"jmp 0f;"
|
||||
"11:"
|
||||
"cmp $1, %%rdx;"
|
||||
"je 1f;"
|
||||
|
||||
"cmpb -2(%%rsi), %%al;"
|
||||
"jne 12f;"
|
||||
"lea -2(%%rsi), %%rax;"
|
||||
"jmp 0f;"
|
||||
"12:"
|
||||
"cmp $2, %%rdx;"
|
||||
"je 1f;"
|
||||
|
||||
"cmpb -3(%%rsi), %%al;"
|
||||
"jne 13f;"
|
||||
"lea -3(%%rsi), %%rax;"
|
||||
"jmp 0f;"
|
||||
"13:"
|
||||
"cmp $3, %%rdx;"
|
||||
"je 1f;"
|
||||
|
||||
"cmpb -4(%%rsi), %%al;"
|
||||
"jne 14f;"
|
||||
"lea -4(%%rsi), %%rax;"
|
||||
"jmp 0f;"
|
||||
"14:"
|
||||
"cmp $4, %%rdx;"
|
||||
"je 1f;"
|
||||
|
||||
"cmpb -5(%%rsi), %%al;"
|
||||
"jne 15f;"
|
||||
"lea -5(%%rsi), %%rax;"
|
||||
"jmp 0f;"
|
||||
"15:"
|
||||
"cmp $5, %%rdx;"
|
||||
"je 1f;"
|
||||
|
||||
"cmpb -6(%%rsi), %%al;"
|
||||
"jne 16f;"
|
||||
"lea -6(%%rsi), %%rax;"
|
||||
"jmp 0f;"
|
||||
"16:"
|
||||
"cmp $6, %%rdx;"
|
||||
"je 1f;"
|
||||
|
||||
"cmpb -7(%%rsi), %%al;"
|
||||
"jne 1f;"
|
||||
"lea -7(%%rsi), %%rax;"
|
||||
"jmp 0f;"
|
||||
|
||||
// failed return
|
||||
"1:"
|
||||
"xor %%rax, %%rax;" // return null
|
||||
|
||||
// success return
|
||||
"0:"
|
||||
|
||||
: "=a"(ret), "=D"(b), "=S"(e) //,"=d"(c)
|
||||
: "D"(b), "S"(e), "r"(c)
|
||||
: "r8", "rcx", "memory", "xmm1", "xmm2", "rdx");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*__attribute__((always_inline))*/ inline void* memchr_sse(const void* s, int c, size_t n) {
|
||||
//s:rdi; c:rsi; n:rdx
|
||||
void* ret;
|
||||
__asm__ __volatile__(
|
||||
"and $0xff, %%esi;" //clear upper bytes
|
||||
"movd %%esi, %%xmm1;"
|
||||
|
||||
"mov $1, %%rax;"
|
||||
"add $16, %%rdx;"
|
||||
|
||||
"1:"
|
||||
"movdqu (%%rdi), %%xmm2;"
|
||||
"sub $16, %%rdx;"
|
||||
"addq $16, %%rdi;"
|
||||
//"pcmpestri $0x0, %%xmm2,%%xmm1;"
|
||||
".byte 0x66 ,0x0f ,0x3a ,0x61 ,0xca ,0x00;"
|
||||
//"lea 16(%%rdi), %%rdi;"
|
||||
"ja 1b;" //Res2==0:no match and zflag==0: s is not end
|
||||
"jc 3f;" //Res2==1: match and s is not end
|
||||
|
||||
"mov $0x0, %%rax;" //no match
|
||||
"jmp 0f;"
|
||||
|
||||
"3:"
|
||||
|
||||
"lea -16(%%rdi,%%rcx),%%rax;"
|
||||
|
||||
"0:"
|
||||
//"mov %%rax, %0;"
|
||||
: "=a"(ret), "=D"(s), "=S"(c), "=d"(n)
|
||||
: "D"(s), "S"(c), "d"(n)
|
||||
: "rcx", "memory", "xmm1", "xmm2");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*__attribute__((always_inline))*/ inline size_t strlen_sse(const char* s) {
|
||||
//s:rdi
|
||||
size_t ret;
|
||||
__asm__ __volatile__(
|
||||
"movq $-16, %%rax;"
|
||||
//"xor %%rcx, %%rcx;"
|
||||
"pxor %%xmm0, %%xmm0;"
|
||||
|
||||
"1:"
|
||||
"movdqu (%%rdi), %%xmm1;"
|
||||
"addq $16, %%rax;"
|
||||
"addq $16, %%rdi;"
|
||||
//"pcmpistri $0x8, %%xmm1,%%xmm0;"
|
||||
".byte 0x66, 0x0f, 0x3a, 0x63, 0xc1, 0x08;"
|
||||
//"lea 16(%%rdi), %%rdi ;"
|
||||
//"lea 16(%%rax), %%rax ;"
|
||||
"jnz 1b;"
|
||||
|
||||
"addq %%rcx, %%rax;"
|
||||
//"mov %%rax, %0;"
|
||||
: "=a"(ret), "=D"(s)
|
||||
: "D"(s)
|
||||
: "rcx", "memory", "xmm0", "xmm1");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*__attribute__((always_inline))*/ inline int strcmp_sse(const char* s1, const char* s2)
|
||||
|
||||
{
|
||||
//s1:rdi; s2:rsi
|
||||
int result;
|
||||
__asm__ __volatile__(
|
||||
"xor %%rax, %%rax ;"
|
||||
//"xor %%rcx, %%rcx ;"
|
||||
|
||||
"1:"
|
||||
"movdqu (%%rdi), %%xmm1;"
|
||||
"movdqu (%%rsi), %%xmm2;"
|
||||
"addq $16, %%rsi;"
|
||||
"addq $16, %%rdi;"
|
||||
// " pcmpistri $0x18, %%xmm2, %%xmm1 ;"
|
||||
".byte 0x66 ,0x0f ,0x3a ,0x63 ,0xca ,0x18;"
|
||||
" ja 1b ;"
|
||||
|
||||
"jnc 0f;"
|
||||
"movzbq -16(%%rsi, %%rcx), %%rdx ;"
|
||||
"movzbq -16(%%rdi, %%rcx), %%rax ;"
|
||||
// "sub %%rdx, %%rax ;"
|
||||
"movl $1, %%ecx;"
|
||||
"movl $-1, %%edi;"
|
||||
"cmp %%rdx, %%rax;"
|
||||
"cmova %%ecx, %%eax;"
|
||||
"cmovb %%edi, %%eax;"
|
||||
|
||||
"0:"
|
||||
//"mov %%eax, %0;"
|
||||
|
||||
: "=a"(result), "=D"(s1), "=S"(s2)
|
||||
: "D"(s1), "S"(s2)
|
||||
: "rcx", "rdx", "memory", "xmm1", "xmm2");
|
||||
return result;
|
||||
}
|
||||
|
||||
/*__attribute__((always_inline))*/ inline int strncmp_sse(const char* s1, const char* s2, size_t n)
|
||||
|
||||
{
|
||||
//s1:rdi; s2:rsi; n:rdx
|
||||
int result;
|
||||
__asm__ __volatile__(
|
||||
"cmp $1, %%rdx;"
|
||||
"jbe 3f;"
|
||||
|
||||
"xor %%rax, %%rax ;"
|
||||
|
||||
"1:"
|
||||
"movdqu (%%rdi), %%xmm1;"
|
||||
"movdqu (%%rsi), %%xmm2;"
|
||||
"addq $16, %%rdi;"
|
||||
"addq $16, %%rsi;"
|
||||
// " pcmpistri $0x18, %%xmm2, %%xmm1 ;"
|
||||
".byte 0x66 ,0x0f ,0x3a ,0x63 ,0xca ,0x18;"
|
||||
// "lea 16(%%rsi), %%rsi;"
|
||||
// "lea 16(%%rdi), %%rdi;"
|
||||
"ja 2f ;" //both 16Byte data elements are valid and identical
|
||||
"jnc 0f;" //Both 16byte data elements have EOS and identical
|
||||
|
||||
//the following situation is Both 16byte data elements differ at offset X (ecx).
|
||||
|
||||
"cmp %%rdx, %%rcx;"
|
||||
"jae 0f;" // X is out of n
|
||||
|
||||
"movzbq -16(%%rsi, %%rcx), %%rdx ;" // X is in the range of n
|
||||
"movzbq -16(%%rdi, %%rcx), %%rax ;"
|
||||
"subq %%rdx, %%rax ;"
|
||||
"jmp 0f;"
|
||||
|
||||
"2:"
|
||||
"subq $16, %%rdx;"
|
||||
"jbe 0f;"
|
||||
"ja 1b;"
|
||||
|
||||
"3:"
|
||||
"xor %%eax, %%eax;"
|
||||
"test %%rdx, %%rdx ;"
|
||||
"jz 0f ;"
|
||||
"movzbl (%%rdi), %%eax;"
|
||||
"movzbl (%%rsi), %%edx;"
|
||||
"subl %%edx, %%eax;"
|
||||
|
||||
"0:"
|
||||
// "mov %%eax, %0;"
|
||||
|
||||
: "=a"(result), "=D"(s1), "=S"(s2), "=d"(n)
|
||||
: "D"(s1), "S"(s2), "d"(n)
|
||||
: "rcx", "memory", "xmm1", "xmm2");
|
||||
return result;
|
||||
}
|
||||
|
||||
/*__attribute__((always_inline))*/ inline int baidu_crc32_byte(char const* src, int crc,
|
||||
int length) {
|
||||
int crc_out;
|
||||
__asm__ __volatile__(
|
||||
"1:"
|
||||
"movzbl (%%rdi), %%ecx;"
|
||||
//"crc32b %%cl, %%esi;"
|
||||
".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1;"
|
||||
|
||||
"add $1, %%rdi;"
|
||||
"sub $1, %%edx;"
|
||||
"jnz 1b;"
|
||||
"movl %%esi,%%eax;"
|
||||
: "=a"(crc_out), "=D"(src), "=S"(crc), "=d"(length)
|
||||
: "D"(src), "S"(crc), "d"(length)
|
||||
: "memory", "ecx");
|
||||
|
||||
return crc_out;
|
||||
}
|
||||
|
||||
inline int crc32c_qw(char const* src, int crc, unsigned int qwlen) {
|
||||
int crc_out;
|
||||
__asm__ __volatile__(
|
||||
"1:"
|
||||
// "crc32q (%%rdi), %%rsi;"
|
||||
".byte 0xf2 ,0x48 ,0x0f ,0x38 ,0xf1, 0x37;"
|
||||
|
||||
"addq $8, %%rdi;"
|
||||
"subl $1, %%edx;"
|
||||
"jnz 1b;"
|
||||
"mov %%esi,%%eax;"
|
||||
: "=a"(crc_out), "=D"(src), "=S"(crc), "=d"(qwlen)
|
||||
: "D"(src), "S"(crc), "d"(qwlen)
|
||||
: "memory");
|
||||
return crc_out;
|
||||
}
|
||||
|
||||
inline int baidu_crc32_qw(char const* src, int crc, unsigned int length) {
|
||||
unsigned int iquotient = length >> 3;
|
||||
unsigned int iremainder = length & 0x7;
|
||||
char const* p;
|
||||
|
||||
if (iquotient) {
|
||||
crc = crc32c_qw(src, crc, iquotient);
|
||||
}
|
||||
|
||||
if (iremainder) {
|
||||
p = src + (length - iremainder);
|
||||
crc = baidu_crc32_byte(p, crc, iremainder);
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
@ -1,204 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "byte_buffer.h"
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include "olap/utils.h"
|
||||
#include "runtime/thread_context.h"
|
||||
|
||||
namespace doris {
|
||||
using namespace ErrorCode;
|
||||
|
||||
StorageByteBuffer::StorageByteBuffer()
|
||||
: _array(nullptr), _capacity(0), _limit(0), _position(0), _is_mmap(false) {}
|
||||
|
||||
StorageByteBuffer::BufDeleter::BufDeleter() : _is_mmap(false), _mmap_length(0) {}
|
||||
|
||||
void StorageByteBuffer::BufDeleter::set_mmap(size_t mmap_length) {
|
||||
_is_mmap = true;
|
||||
_mmap_length = mmap_length;
|
||||
}
|
||||
|
||||
void StorageByteBuffer::BufDeleter::operator()(char* p) {
|
||||
if (nullptr == p) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (_is_mmap) {
|
||||
if (0 != munmap(p, _mmap_length)) {
|
||||
LOG(FATAL) << "fail to munmap: mem=" << p << ", len=" << _mmap_length
|
||||
<< ", errno=" << Errno::no() << ", errno_str=" << Errno::str();
|
||||
} else {
|
||||
RELEASE_THREAD_MEM_TRACKER(_mmap_length);
|
||||
}
|
||||
} else {
|
||||
delete[] p;
|
||||
}
|
||||
}
|
||||
|
||||
// 创建ByteBuffer与array
|
||||
StorageByteBuffer* StorageByteBuffer::create(uint64_t capacity) {
|
||||
char* memory = new (std::nothrow) char[capacity];
|
||||
StorageByteBuffer* buf = new (std::nothrow) StorageByteBuffer;
|
||||
|
||||
if (buf != nullptr && memory != nullptr) {
|
||||
buf->_buf = std::shared_ptr<char>(memory, BufDeleter());
|
||||
buf->_array = buf->_buf.get();
|
||||
buf->_capacity = capacity;
|
||||
buf->_limit = capacity;
|
||||
return buf;
|
||||
}
|
||||
|
||||
SAFE_DELETE(buf);
|
||||
SAFE_DELETE_ARRAY(memory);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
StorageByteBuffer* StorageByteBuffer::reference_buffer(StorageByteBuffer* reference,
|
||||
uint64_t offset, uint64_t length) {
|
||||
if (nullptr == reference || 0 == length) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (offset + length > reference->capacity()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
StorageByteBuffer* buf = new (std::nothrow) StorageByteBuffer();
|
||||
|
||||
if (nullptr == buf) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
buf->_buf = reference->_buf;
|
||||
buf->_array = &(reference->_array[offset]);
|
||||
buf->_capacity = length;
|
||||
buf->_limit = length;
|
||||
buf->_is_mmap = reference->_is_mmap;
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
StorageByteBuffer* StorageByteBuffer::mmap(void* start, uint64_t length, int prot, int flags,
|
||||
int fd, uint64_t offset) {
|
||||
CONSUME_THREAD_MEM_TRACKER(length);
|
||||
char* memory = (char*)::mmap(start, length, prot, flags, fd, offset);
|
||||
|
||||
if (MAP_FAILED == memory) {
|
||||
LOG(WARNING) << "fail to mmap. [errno='" << Errno::no() << "' errno_str='" << Errno::str()
|
||||
<< "']";
|
||||
RELEASE_THREAD_MEM_TRACKER(length);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
BufDeleter deleter;
|
||||
deleter.set_mmap(length);
|
||||
|
||||
StorageByteBuffer* buf = new (std::nothrow) StorageByteBuffer();
|
||||
|
||||
if (nullptr == buf) {
|
||||
deleter(memory);
|
||||
LOG(WARNING) << "fail to allocate StorageByteBuffer.";
|
||||
RELEASE_THREAD_MEM_TRACKER(length);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
buf->_buf = std::shared_ptr<char>(memory, deleter);
|
||||
buf->_array = buf->_buf.get();
|
||||
buf->_capacity = length;
|
||||
buf->_limit = length;
|
||||
buf->_is_mmap = true;
|
||||
return buf;
|
||||
}
|
||||
|
||||
StorageByteBuffer* StorageByteBuffer::mmap(FileHandler* handler, uint64_t offset, int prot,
|
||||
int flags) {
|
||||
if (nullptr == handler) {
|
||||
LOG(WARNING) << "invalid file handler";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t length = handler->length();
|
||||
int fd = handler->fd();
|
||||
CONSUME_THREAD_MEM_TRACKER(length);
|
||||
char* memory = (char*)::mmap(nullptr, length, prot, flags, fd, offset);
|
||||
|
||||
if (MAP_FAILED == memory) {
|
||||
LOG(WARNING) << "fail to mmap. [errno='" << Errno::no() << "' errno_str='" << Errno::str()
|
||||
<< "']";
|
||||
RELEASE_THREAD_MEM_TRACKER(length);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
BufDeleter deleter;
|
||||
deleter.set_mmap(length);
|
||||
|
||||
StorageByteBuffer* buf = new (std::nothrow) StorageByteBuffer();
|
||||
|
||||
if (nullptr == buf) {
|
||||
deleter(memory);
|
||||
LOG(WARNING) << "fail to allocate StorageByteBuffer.";
|
||||
RELEASE_THREAD_MEM_TRACKER(length);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
buf->_buf = std::shared_ptr<char>(memory, deleter);
|
||||
buf->_array = buf->_buf.get();
|
||||
buf->_capacity = length;
|
||||
buf->_limit = length;
|
||||
buf->_is_mmap = true;
|
||||
return buf;
|
||||
}
|
||||
|
||||
Status StorageByteBuffer::put(char src) {
|
||||
if (_position < _limit) {
|
||||
_array[_position++] = src;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
return Status::Error<BUFFER_OVERFLOW>();
|
||||
}
|
||||
|
||||
Status StorageByteBuffer::put(uint64_t index, char src) {
|
||||
if (index < _limit) {
|
||||
_array[index] = src;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
return Status::Error<BUFFER_OVERFLOW>();
|
||||
}
|
||||
|
||||
Status StorageByteBuffer::put(const char* src, uint64_t src_size, uint64_t offset,
|
||||
uint64_t length) {
|
||||
//没有足够的空间可以写
|
||||
if (length > remaining()) {
|
||||
return Status::Error<BUFFER_OVERFLOW>();
|
||||
}
|
||||
|
||||
//src不够大
|
||||
if (offset + length > src_size) {
|
||||
return Status::Error<OUT_OF_BOUND>();
|
||||
}
|
||||
|
||||
memory_copy(&_array[_position], &src[offset], length);
|
||||
_position += length;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
@ -1,210 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "olap/file_helper.h"
|
||||
#include "olap/olap_define.h"
|
||||
#include "util/mem_util.hpp"
|
||||
|
||||
namespace doris {
|
||||
|
||||
// ByteBuffer is a class used for data caching
|
||||
// ByteBuffer maintains an internal char array for caching data;
|
||||
// ByteBuffer maintains internal Pointers for reading and writing data;
|
||||
//
|
||||
// ByteBuffer has the following important usage concepts:
|
||||
// capacity - the capacity of the buffer, set at initialization, is the size of the internal char array
|
||||
// position - the current internal pointer position
|
||||
// limit - maximum usage limit, this value is less than or equal to capacity, position is always less than limit
|
||||
//
|
||||
// ByteBuffer supports safe shallow copying of data directly using the copy constructor or = operator
|
||||
class StorageByteBuffer {
|
||||
public:
|
||||
// Create a StorageByteBuffer of capacity with the new method.
|
||||
// The position of the new buffer is 0, and the limit is capacity
|
||||
// The caller obtains the ownership of the newly created ByteBuffer, and needs to use delete method to delete the obtained StorageByteBuffer
|
||||
//
|
||||
// TODO. I think the use of create here should directly return the ByteBuffer itself instead of the smart pointer,
|
||||
// otherwise the smart pointer will not work,
|
||||
// and the current memory management is still manual.and need to think delete.
|
||||
static StorageByteBuffer* create(uint64_t capacity);
|
||||
|
||||
// Create a new StorageByteBuffer by referencing another ByteBuffer's memory
|
||||
// The position of the new buffer is 0, and the limit is length
|
||||
// The caller obtains the ownership of the newly created ByteBuffer, and needs to use delete method to delete the obtained StorageByteBuffer
|
||||
// Inputs:
|
||||
// - reference referenced memory
|
||||
// - offset The position of the referenced Buffer in the original ByteBuffer, i.e.&reference->array()[offset]
|
||||
// - length The length of the referenced Buffer
|
||||
// Notes:
|
||||
// offset + length < reference->capacity
|
||||
//
|
||||
// TODO. same as create
|
||||
static StorageByteBuffer* reference_buffer(StorageByteBuffer* reference, uint64_t offset,
|
||||
uint64_t length);
|
||||
|
||||
// Create a ByteBuffer through mmap, and the memory after successful mmap is managed by ByteBuffer
|
||||
// start, length, prot, flags, fd, offset are all parameters of mmap function
|
||||
// The caller obtains the ownership of the newly created ByteBuffer, and needs to use delete method to delete the obtained StorageByteBuffer
|
||||
static StorageByteBuffer* mmap(void* start, uint64_t length, int prot, int flags, int fd,
|
||||
uint64_t offset);
|
||||
|
||||
// Since olap files are encapsulated with FileHandler, the interface is slightly modified
|
||||
// and the omitted parameters can be obtained in the handler.
|
||||
// The old interface is still preserved, maybe it will be used?
|
||||
static StorageByteBuffer* mmap(FileHandler* handler, uint64_t offset, int prot, int flags);
|
||||
|
||||
uint64_t capacity() const { return _capacity; }
|
||||
|
||||
uint64_t position() const { return _position; }
|
||||
// Set the position of the internal pointer
|
||||
// If the new position is greater than or equal to limit, return Status::Error<ErrorCode::INVALID_ARGUMENT>()
|
||||
Status set_position(uint64_t new_position) {
|
||||
if (new_position <= _limit) {
|
||||
_position = new_position;
|
||||
return Status::OK();
|
||||
} else {
|
||||
return Status::Error<ErrorCode::INVALID_ARGUMENT>();
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t limit() const { return _limit; }
|
||||
//set new limit
|
||||
//If limit is greater than capacity, return Status::Error<ErrorCode::INVALID_ARGUMENT>()
|
||||
//If position is greater than the new limit, set position equal to limit
|
||||
Status set_limit(uint64_t new_limit) {
|
||||
if (new_limit > _capacity) {
|
||||
return Status::Error<ErrorCode::INVALID_ARGUMENT>();
|
||||
}
|
||||
|
||||
_limit = new_limit;
|
||||
|
||||
if (_position > _limit) {
|
||||
_position = _limit;
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
uint64_t remaining() const { return _limit - _position; }
|
||||
|
||||
// Set limit to current position
|
||||
// set position to 0
|
||||
// This function can be used to change the ByteBuffer from the write state to the read state,
|
||||
// that is, call this function after some writes, and then read the ByteBuffer.
|
||||
void flip() {
|
||||
_limit = _position;
|
||||
_position = 0;
|
||||
}
|
||||
|
||||
// The following three read functions are inline optimized
|
||||
|
||||
// Read one byte of data, increase position after completion
|
||||
Status get(char* result) {
|
||||
if (OLAP_LIKELY(_position < _limit)) {
|
||||
*result = _array[_position++];
|
||||
return Status::OK();
|
||||
} else {
|
||||
return Status::Error<ErrorCode::OUT_OF_BOUND>();
|
||||
}
|
||||
}
|
||||
|
||||
// Read one byte of data at the specified location
|
||||
Status get(uint64_t index, char* result) {
|
||||
if (OLAP_LIKELY(index < _limit)) {
|
||||
*result = _array[index];
|
||||
return Status::OK();
|
||||
} else {
|
||||
return Status::Error<ErrorCode::OUT_OF_BOUND>();
|
||||
}
|
||||
}
|
||||
|
||||
// Read a piece of data of length length to dst, and increase the position after completion
|
||||
Status get(char* dst, uint64_t dst_size, uint64_t length) {
|
||||
// Not enough data to read
|
||||
if (OLAP_UNLIKELY(length > remaining())) {
|
||||
return Status::Error<ErrorCode::OUT_OF_BOUND>();
|
||||
}
|
||||
|
||||
// dst is not big enough
|
||||
if (OLAP_UNLIKELY(length > dst_size)) {
|
||||
return Status::Error<ErrorCode::BUFFER_OVERFLOW>();
|
||||
}
|
||||
|
||||
memory_copy(dst, &_array[_position], length);
|
||||
_position += length;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Read dst_size long data to dst
|
||||
Status get(char* dst, uint64_t dst_size) { return get(dst, dst_size, dst_size); }
|
||||
|
||||
// Write a byte, increment position when done
|
||||
// If position >= limit before writing, return Status::Error<ErrorCode::BUFFER_OVERFLOW>()
|
||||
Status put(char src);
|
||||
|
||||
// Write data at the index position without changing the position
|
||||
// Returns:
|
||||
// Status::Error<ErrorCode::BUFFER_OVERFLOW>() : index >= limit
|
||||
Status put(uint64_t index, char src);
|
||||
|
||||
// Read length bytes from &src[offset], write to buffer, and increase position after completion
|
||||
// Returns:
|
||||
// Status::Error<ErrorCode::BUFFER_OVERFLOW>(): remaining() < length
|
||||
// Status::Error<ErrorCode::OUT_OF_BOUND>(): offset + length > src_size
|
||||
Status put(const char* src, uint64_t src_size, uint64_t offset, uint64_t length);
|
||||
|
||||
// write a set of data
|
||||
Status put(const char* src, uint64_t src_size) { return put(src, src_size, 0, src_size); }
|
||||
|
||||
// Returns the char array inside the ByteBuffer
|
||||
const char* array() const { return _array; }
|
||||
const char* array(size_t position) const {
|
||||
return position >= _limit ? nullptr : &_array[position];
|
||||
}
|
||||
char* array() { return _array; }
|
||||
|
||||
private:
|
||||
// A custom destructor class that supports destructing the memory of new[] and mmap
|
||||
// Use delete to release by default
|
||||
class BufDeleter {
|
||||
public:
|
||||
BufDeleter();
|
||||
// Set to use mmap method
|
||||
void set_mmap(size_t mmap_length);
|
||||
void operator()(char* p);
|
||||
|
||||
private:
|
||||
bool _is_mmap; // whether to use mmap
|
||||
size_t _mmap_length; // If mmap is used, record the length of mmap
|
||||
};
|
||||
|
||||
private:
|
||||
// Direct creation of ByteBuffer is not supported, but created through the create method
|
||||
StorageByteBuffer();
|
||||
|
||||
private:
|
||||
std::shared_ptr<char> _buf; // managed memory
|
||||
char* _array;
|
||||
uint64_t _capacity;
|
||||
uint64_t _limit;
|
||||
uint64_t _position;
|
||||
bool _is_mmap;
|
||||
};
|
||||
|
||||
} // namespace doris
|
||||
@ -1,91 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "compress.h"
|
||||
|
||||
#include "olap/byte_buffer.h"
|
||||
#include "olap/utils.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
#ifdef DORIS_WITH_LZO
|
||||
Status lzo_compress(StorageByteBuffer* in, StorageByteBuffer* out, bool* smaller) {
|
||||
size_t out_length = 0;
|
||||
Status res = Status::OK();
|
||||
*smaller = false;
|
||||
res = olap_compress(&(in->array()[in->position()]), in->remaining(),
|
||||
&(out->array()[out->position()]), out->remaining(), &out_length,
|
||||
OLAP_COMP_STORAGE);
|
||||
|
||||
if (res.ok()) {
|
||||
if (out_length < in->remaining()) {
|
||||
*smaller = true;
|
||||
out->set_position(out->position() + out_length);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
Status lzo_decompress(StorageByteBuffer* in, StorageByteBuffer* out) {
|
||||
size_t out_length = 0;
|
||||
Status res = Status::OK();
|
||||
res = olap_decompress(&(in->array()[in->position()]), in->remaining(),
|
||||
&(out->array()[out->position()]), out->remaining(), &out_length,
|
||||
OLAP_COMP_STORAGE);
|
||||
|
||||
if (res.ok()) {
|
||||
out->set_limit(out_length);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
Status lz4_compress(StorageByteBuffer* in, StorageByteBuffer* out, bool* smaller) {
|
||||
size_t out_length = 0;
|
||||
Status res = Status::OK();
|
||||
*smaller = false;
|
||||
res = olap_compress(&(in->array()[in->position()]), in->remaining(),
|
||||
&(out->array()[out->position()]), out->remaining(), &out_length,
|
||||
OLAP_COMP_LZ4);
|
||||
|
||||
if (res.ok()) {
|
||||
if (out_length < in->remaining()) {
|
||||
*smaller = true;
|
||||
out->set_position(out->position() + out_length);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
Status lz4_decompress(StorageByteBuffer* in, StorageByteBuffer* out) {
|
||||
size_t out_length = 0;
|
||||
Status res = Status::OK();
|
||||
res = olap_decompress(&(in->array()[in->position()]), in->remaining(),
|
||||
&(out->array()[out->position()]), out->remaining(), &out_length,
|
||||
OLAP_COMP_LZ4);
|
||||
|
||||
if (res.ok()) {
|
||||
out->set_limit(out_length);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
@ -1,55 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "olap/olap_define.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class StorageByteBuffer;
|
||||
|
||||
// Define a compression function to compress the remaining memory in the input buffer
|
||||
// and save it to the remaining space in the output buffer
|
||||
// Inputs:
|
||||
// in - input buffer,Compress memory from position to limit
|
||||
// out - output buffer,The space from position to limit can be used to store data
|
||||
// smaller - Whether the compressed data size is smaller than the data size before compression
|
||||
// Returns:
|
||||
// Status::Error<BUFFER_OVERFLOW>() - Insufficient space left in output buffer
|
||||
// Status::Error<COMPRESS_ERROR>() - Compression error
|
||||
typedef Status (*Compressor)(StorageByteBuffer* in, StorageByteBuffer* out, bool* smaller);
|
||||
|
||||
// Define a decompression function to decompress the remaining memory in the input buffer
|
||||
// and save it to the remaining space in the output buffer
|
||||
// Inputs:
|
||||
// in - input buffer,Decompress memory from position to limit
|
||||
// out - output buffer,The space from position to limit can be used to store data
|
||||
// Returns:
|
||||
// Status::Error<BUFFER_OVERFLOW>() - Insufficient space left in output buffer
|
||||
// Status::Error<DECOMPRESS_ERROR>() - decompression error
|
||||
typedef Status (*Decompressor)(StorageByteBuffer* in, StorageByteBuffer* out);
|
||||
|
||||
#ifdef DORIS_WITH_LZO
|
||||
Status lzo_compress(StorageByteBuffer* in, StorageByteBuffer* out, bool* smaller);
|
||||
Status lzo_decompress(StorageByteBuffer* in, StorageByteBuffer* out);
|
||||
#endif
|
||||
|
||||
Status lz4_compress(StorageByteBuffer* in, StorageByteBuffer* out, bool* smaller);
|
||||
Status lz4_decompress(StorageByteBuffer* in, StorageByteBuffer* out);
|
||||
|
||||
} // namespace doris
|
||||
@ -542,18 +542,6 @@ unsigned int crc32c_lut(char const* b, unsigned int off, unsigned int len, unsig
|
||||
return localCrc;
|
||||
}
|
||||
|
||||
uint32_t olap_crc32(uint32_t crc32, const char* buf, size_t len) {
|
||||
#if defined(__i386) || defined(__x86_64__)
|
||||
if (OLAP_LIKELY(CpuInfo::is_supported(CpuInfo::SSE4_2))) {
|
||||
return baidu_crc32_qw(buf, crc32, len);
|
||||
} else {
|
||||
return crc32c_lut(buf, 0, len, crc32);
|
||||
}
|
||||
#else
|
||||
return crc32c_lut(buf, 0, len, crc32);
|
||||
#endif
|
||||
}
|
||||
|
||||
Status gen_timestamp_string(string* out_string) {
|
||||
time_t now = time(nullptr);
|
||||
tm local_tm;
|
||||
|
||||
@ -35,9 +35,6 @@
|
||||
#include <vector>
|
||||
|
||||
#include "common/logging.h"
|
||||
#if defined(__i386) || defined(__x86_64__)
|
||||
#include "olap/bhp_lib.h"
|
||||
#endif
|
||||
#include "olap/olap_common.h"
|
||||
#include "olap/olap_define.h"
|
||||
|
||||
@ -126,26 +123,9 @@ Status olap_decompress(const char* src_buf, size_t src_len, char* dest_buf, size
|
||||
#define ADLER32_INIT adler32(0L, Z_NULL, 0)
|
||||
uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len);
|
||||
|
||||
// CRC32仅仅用在RowBlock的校验,性能优异
|
||||
#define CRC32_INIT 0xFFFFFFFF
|
||||
uint32_t olap_crc32(uint32_t crc32, const char* buf, size_t len);
|
||||
|
||||
// 获取系统当前时间,并将时间转换为字符串
|
||||
Status gen_timestamp_string(std::string* out_string);
|
||||
|
||||
enum ComparatorEnum {
|
||||
COMPARATOR_LESS = 0,
|
||||
COMPARATOR_LARGER = 1,
|
||||
};
|
||||
|
||||
// 处理comparator functor处理过程中出现的错误
|
||||
class ComparatorException : public std::exception {
|
||||
public:
|
||||
virtual const char* what() const throw() {
|
||||
return "exception happens when doing binary search.";
|
||||
}
|
||||
};
|
||||
|
||||
// iterator offset,用于二分查找
|
||||
using iterator_offset_t = size_t;
|
||||
|
||||
|
||||
@ -24,7 +24,6 @@
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "common/utils.h"
|
||||
#include "exec/arrow/orc_reader.h"
|
||||
#include "exec/text_converter.hpp"
|
||||
#include "olap/iterators.h"
|
||||
#include "runtime/descriptors.h"
|
||||
|
||||
@ -19,7 +19,6 @@
|
||||
|
||||
#include <arrow/array.h>
|
||||
#include <exec/arrow/arrow_reader.h>
|
||||
#include <exec/arrow/orc_reader.h>
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
|
||||
@ -54,15 +54,12 @@ set(EXPRS_TEST_FILES
|
||||
exprs/json_function_test.cpp
|
||||
exprs/string_functions_test.cpp
|
||||
exprs/timestamp_functions_test.cpp
|
||||
exprs/percentile_approx_test.cpp
|
||||
exprs/percentile_test.cpp
|
||||
exprs/bitmap_function_test.cpp
|
||||
exprs/encryption_functions_test.cpp
|
||||
exprs/math_functions_test.cpp
|
||||
exprs/topn_function_test.cpp
|
||||
exprs/bloom_filter_predicate_test.cpp
|
||||
exprs/quantile_function_test.cpp
|
||||
exprs/window_funnel_test.cpp
|
||||
exprs/hash_function_test.cpp
|
||||
)
|
||||
set(GEO_TEST_FILES
|
||||
@ -91,7 +88,6 @@ set(OLAP_TEST_FILES
|
||||
olap/tablet_schema_helper.cpp
|
||||
olap/delta_writer_test.cpp
|
||||
olap/delete_handler_test.cpp
|
||||
olap/byte_buffer_test.cpp
|
||||
olap/lru_cache_test.cpp
|
||||
olap/bloom_filter_test.cpp
|
||||
olap/itoken_extractor_test.cpp
|
||||
|
||||
@ -24,7 +24,6 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "exprs/aggregate_functions.h"
|
||||
#include "exprs/anyval_util.h"
|
||||
#include "testutil/function_utils.h"
|
||||
#include "util/bitmap_intersect.h"
|
||||
|
||||
@ -1,142 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "exprs/aggregate_functions.h"
|
||||
#include "testutil/function_utils.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class PercentileApproxTest : public testing::Test {
|
||||
public:
|
||||
PercentileApproxTest() {}
|
||||
};
|
||||
|
||||
TEST_F(PercentileApproxTest, testSample) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
DoubleVal doubleQ(0.9);
|
||||
|
||||
StringVal stringVal1;
|
||||
DoubleVal int1(1);
|
||||
AggregateFunctions::percentile_approx_init(context, &stringVal1);
|
||||
AggregateFunctions::percentile_approx_update(context, int1, doubleQ, &stringVal1);
|
||||
DoubleVal int2(2);
|
||||
AggregateFunctions::percentile_approx_update(context, int2, doubleQ, &stringVal1);
|
||||
|
||||
StringVal s = AggregateFunctions::percentile_approx_serialize(context, stringVal1);
|
||||
|
||||
StringVal stringVal2;
|
||||
AggregateFunctions::percentile_approx_init(context, &stringVal2);
|
||||
AggregateFunctions::percentile_approx_merge(context, s, &stringVal2);
|
||||
DoubleVal v = AggregateFunctions::percentile_approx_finalize(context, stringVal2);
|
||||
EXPECT_EQ(v.val, 2);
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(PercentileApproxTest, testNoMerge) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
DoubleVal doubleQ(0.9);
|
||||
|
||||
StringVal stringVal1;
|
||||
DoubleVal val(1);
|
||||
AggregateFunctions::percentile_approx_init(context, &stringVal1);
|
||||
AggregateFunctions::percentile_approx_update(context, val, doubleQ, &stringVal1);
|
||||
DoubleVal val2(2);
|
||||
AggregateFunctions::percentile_approx_update(context, val2, doubleQ, &stringVal1);
|
||||
|
||||
DoubleVal v = AggregateFunctions::percentile_approx_finalize(context, stringVal1);
|
||||
EXPECT_EQ(v.val, 2);
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(PercentileApproxTest, testSerialize) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
DoubleVal doubleQ(0.999);
|
||||
StringVal stringVal;
|
||||
AggregateFunctions::percentile_approx_init(context, &stringVal);
|
||||
|
||||
for (int i = 1; i <= 100000; i++) {
|
||||
DoubleVal val(i);
|
||||
AggregateFunctions::percentile_approx_update(context, val, doubleQ, &stringVal);
|
||||
}
|
||||
StringVal serialized = AggregateFunctions::percentile_approx_serialize(context, stringVal);
|
||||
|
||||
// mock serialize
|
||||
StringVal stringVal2;
|
||||
AggregateFunctions::percentile_approx_init(context, &stringVal2);
|
||||
AggregateFunctions::percentile_approx_merge(context, serialized, &stringVal2);
|
||||
DoubleVal v = AggregateFunctions::percentile_approx_finalize(context, stringVal2);
|
||||
EXPECT_DOUBLE_EQ(v.val, 99900.5);
|
||||
|
||||
// merge init percentile stringVal3 should not change the correct result
|
||||
AggregateFunctions::percentile_approx_init(context, &stringVal);
|
||||
|
||||
for (int i = 1; i <= 100000; i++) {
|
||||
DoubleVal val(i);
|
||||
AggregateFunctions::percentile_approx_update(context, val, doubleQ, &stringVal);
|
||||
}
|
||||
serialized = AggregateFunctions::percentile_approx_serialize(context, stringVal);
|
||||
|
||||
StringVal stringVal3;
|
||||
AggregateFunctions::percentile_approx_init(context, &stringVal2);
|
||||
AggregateFunctions::percentile_approx_init(context, &stringVal3);
|
||||
StringVal serialized2 = AggregateFunctions::percentile_approx_serialize(context, stringVal3);
|
||||
|
||||
AggregateFunctions::percentile_approx_merge(context, serialized, &stringVal2);
|
||||
AggregateFunctions::percentile_approx_merge(context, serialized2, &stringVal2);
|
||||
v = AggregateFunctions::percentile_approx_finalize(context, stringVal2);
|
||||
EXPECT_DOUBLE_EQ(v.val, 99900.5);
|
||||
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(PercentileApproxTest, testNullVale) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
DoubleVal doubleQ(0.999);
|
||||
StringVal stringVal;
|
||||
AggregateFunctions::percentile_approx_init(context, &stringVal);
|
||||
|
||||
for (int i = 1; i <= 100000; i++) {
|
||||
if (i % 3 == 0) {
|
||||
AggregateFunctions::percentile_approx_update(context, DoubleVal::null(), doubleQ,
|
||||
&stringVal);
|
||||
} else {
|
||||
AggregateFunctions::percentile_approx_update(context, DoubleVal(i), doubleQ,
|
||||
&stringVal);
|
||||
}
|
||||
}
|
||||
StringVal serialized = AggregateFunctions::percentile_approx_serialize(context, stringVal);
|
||||
|
||||
// mock serialize
|
||||
StringVal stringVal2;
|
||||
AggregateFunctions::percentile_approx_init(context, &stringVal2);
|
||||
AggregateFunctions::percentile_approx_merge(context, serialized, &stringVal2);
|
||||
DoubleVal v = AggregateFunctions::percentile_approx_finalize(context, stringVal2);
|
||||
EXPECT_FLOAT_EQ(v.val, 99900.665999999997);
|
||||
delete futil;
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
@ -1,114 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "exprs/aggregate_functions.h"
|
||||
#include "testutil/function_utils.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class PercentileTest : public testing::Test {
|
||||
public:
|
||||
PercentileTest() {}
|
||||
};
|
||||
|
||||
TEST_F(PercentileTest, testSample) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
DoubleVal doubleQ(0.9);
|
||||
|
||||
StringVal stringVal1;
|
||||
BigIntVal int1(1);
|
||||
AggregateFunctions::percentile_init(context, &stringVal1);
|
||||
AggregateFunctions::percentile_update(context, int1, doubleQ, &stringVal1);
|
||||
BigIntVal int2(2);
|
||||
AggregateFunctions::percentile_update(context, int2, doubleQ, &stringVal1);
|
||||
|
||||
StringVal s = AggregateFunctions::percentile_serialize(context, stringVal1);
|
||||
|
||||
StringVal stringVal2;
|
||||
AggregateFunctions::percentile_init(context, &stringVal2);
|
||||
AggregateFunctions::percentile_merge(context, s, &stringVal2);
|
||||
DoubleVal v = AggregateFunctions::percentile_finalize(context, stringVal2);
|
||||
EXPECT_EQ(v.val, 1.9);
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(PercentileTest, testNoMerge) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
DoubleVal doubleQ(0.9);
|
||||
|
||||
StringVal stringVal1;
|
||||
BigIntVal val(1);
|
||||
AggregateFunctions::percentile_init(context, &stringVal1);
|
||||
AggregateFunctions::percentile_update(context, val, doubleQ, &stringVal1);
|
||||
BigIntVal val2(2);
|
||||
AggregateFunctions::percentile_update(context, val2, doubleQ, &stringVal1);
|
||||
|
||||
DoubleVal v = AggregateFunctions::percentile_finalize(context, stringVal1);
|
||||
EXPECT_EQ(v.val, 1.9);
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(PercentileTest, testSerialize) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
DoubleVal doubleQ(0.999);
|
||||
StringVal stringVal;
|
||||
AggregateFunctions::percentile_init(context, &stringVal);
|
||||
|
||||
for (int i = 1; i <= 100000; i++) {
|
||||
BigIntVal val(i);
|
||||
AggregateFunctions::percentile_update(context, val, doubleQ, &stringVal);
|
||||
}
|
||||
StringVal serialized = AggregateFunctions::percentile_serialize(context, stringVal);
|
||||
|
||||
// mock serialize
|
||||
StringVal stringVal2;
|
||||
AggregateFunctions::percentile_init(context, &stringVal2);
|
||||
AggregateFunctions::percentile_merge(context, serialized, &stringVal2);
|
||||
DoubleVal v = AggregateFunctions::percentile_finalize(context, stringVal2);
|
||||
EXPECT_DOUBLE_EQ(v.val, 99900.001);
|
||||
|
||||
// merge init percentile stringVal3 should not change the correct result
|
||||
AggregateFunctions::percentile_init(context, &stringVal);
|
||||
|
||||
for (int i = 1; i <= 100000; i++) {
|
||||
BigIntVal val(i);
|
||||
AggregateFunctions::percentile_update(context, val, doubleQ, &stringVal);
|
||||
}
|
||||
serialized = AggregateFunctions::percentile_serialize(context, stringVal);
|
||||
|
||||
StringVal stringVal3;
|
||||
AggregateFunctions::percentile_init(context, &stringVal2);
|
||||
AggregateFunctions::percentile_init(context, &stringVal3);
|
||||
StringVal serialized2 = AggregateFunctions::percentile_serialize(context, stringVal3);
|
||||
|
||||
AggregateFunctions::percentile_merge(context, serialized, &stringVal2);
|
||||
AggregateFunctions::percentile_merge(context, serialized2, &stringVal2);
|
||||
v = AggregateFunctions::percentile_finalize(context, stringVal2);
|
||||
EXPECT_DOUBLE_EQ(v.val, 99900.001);
|
||||
|
||||
delete futil;
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
@ -1,425 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "exprs/aggregate_functions.h"
|
||||
#include "runtime/datetime_value.h"
|
||||
#include "testutil/function_utils.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
class WindowFunnelTest : public testing::Test {
|
||||
public:
|
||||
WindowFunnelTest() {}
|
||||
};
|
||||
|
||||
TEST_F(WindowFunnelTest, testMax4SortedNoMerge) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
const int NUM_CONDS = 4;
|
||||
for (int i = -1; i < NUM_CONDS + 4; i++) {
|
||||
StringVal stringVal1;
|
||||
BigIntVal window(i);
|
||||
StringVal mode("default");
|
||||
std::vector<doris_udf::AnyVal*> constant_args;
|
||||
constant_args.emplace_back(&window);
|
||||
constant_args.emplace_back(&mode);
|
||||
context->impl()->set_constant_args(std::move(constant_args));
|
||||
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal1);
|
||||
|
||||
DateTimeVal timestamp;
|
||||
DateTimeValue time_value;
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 1, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds[NUM_CONDS] = {true, false, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS, conds,
|
||||
&stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 2, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds1[NUM_CONDS] = {false, true, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds1, &stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 3, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds2[NUM_CONDS] = {false, false, true, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds2, &stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 4, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds3[NUM_CONDS] = {false, false, false, true};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds3, &stringVal1);
|
||||
|
||||
IntVal v = AggregateFunctions::window_funnel_finalize(context, stringVal1);
|
||||
LOG(INFO) << "event num: " << NUM_CONDS << " window: " << window.val;
|
||||
EXPECT_EQ(v.val, i < 0 ? 1 : (i < NUM_CONDS ? i + 1 : NUM_CONDS));
|
||||
}
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(WindowFunnelTest, testMax4SortedMerge) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
const int NUM_CONDS = 4;
|
||||
for (int i = -1; i < NUM_CONDS + 4; i++) {
|
||||
StringVal stringVal1;
|
||||
BigIntVal window(i);
|
||||
StringVal mode("default");
|
||||
std::vector<doris_udf::AnyVal*> constant_args;
|
||||
constant_args.emplace_back(&window);
|
||||
constant_args.emplace_back(&mode);
|
||||
context->impl()->set_constant_args(std::move(constant_args));
|
||||
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal1);
|
||||
|
||||
DateTimeVal timestamp;
|
||||
DateTimeValue time_value;
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 1, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
|
||||
BooleanVal conds[NUM_CONDS] = {true, false, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS, conds,
|
||||
&stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 2, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds1[NUM_CONDS] = {false, true, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds1, &stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 3, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds2[NUM_CONDS] = {false, false, true, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds2, &stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 4, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds3[NUM_CONDS] = {false, false, false, true};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds3, &stringVal1);
|
||||
|
||||
StringVal s = AggregateFunctions::window_funnel_serialize(context, stringVal1);
|
||||
|
||||
StringVal stringVal2;
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal2);
|
||||
AggregateFunctions::window_funnel_merge(context, s, &stringVal2);
|
||||
IntVal v = AggregateFunctions::window_funnel_finalize(context, stringVal2);
|
||||
LOG(INFO) << "event num: " << NUM_CONDS << " window: " << window.val;
|
||||
EXPECT_EQ(v.val, i < 0 ? 1 : (i < NUM_CONDS ? i + 1 : NUM_CONDS));
|
||||
}
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(WindowFunnelTest, testMax4ReverseSortedNoMerge) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
const int NUM_CONDS = 4;
|
||||
for (int i = -1; i < NUM_CONDS + 4; i++) {
|
||||
StringVal stringVal1;
|
||||
BigIntVal window(i);
|
||||
StringVal mode("default");
|
||||
std::vector<doris_udf::AnyVal*> constant_args;
|
||||
constant_args.emplace_back(&window);
|
||||
constant_args.emplace_back(&mode);
|
||||
context->impl()->set_constant_args(std::move(constant_args));
|
||||
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal1);
|
||||
|
||||
DateTimeVal timestamp;
|
||||
DateTimeValue time_value;
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 3, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
|
||||
BooleanVal conds[NUM_CONDS] = {true, false, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS, conds,
|
||||
&stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 2, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds1[NUM_CONDS] = {false, true, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds1, &stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 1, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds2[NUM_CONDS] = {false, false, true, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds2, &stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 0, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds3[NUM_CONDS] = {false, false, false, true};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds3, &stringVal1);
|
||||
|
||||
IntVal v = AggregateFunctions::window_funnel_finalize(context, stringVal1);
|
||||
LOG(INFO) << "event num: " << NUM_CONDS << " window: " << window.val;
|
||||
EXPECT_EQ(v.val, 1);
|
||||
}
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(WindowFunnelTest, testMax4ReverseSortedMerge) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
const int NUM_CONDS = 4;
|
||||
for (int i = -1; i < NUM_CONDS + 4; i++) {
|
||||
StringVal stringVal1;
|
||||
BigIntVal window(i);
|
||||
StringVal mode("default");
|
||||
std::vector<doris_udf::AnyVal*> constant_args;
|
||||
constant_args.emplace_back(&window);
|
||||
constant_args.emplace_back(&mode);
|
||||
context->impl()->set_constant_args(std::move(constant_args));
|
||||
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal1);
|
||||
|
||||
DateTimeVal timestamp;
|
||||
DateTimeValue time_value;
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 3, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
|
||||
BooleanVal conds[NUM_CONDS] = {true, false, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS, conds,
|
||||
&stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 2, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds1[NUM_CONDS] = {false, true, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds1, &stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 1, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds2[NUM_CONDS] = {false, false, true, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds2, &stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 0, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds3[NUM_CONDS] = {false, false, false, true};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds3, &stringVal1);
|
||||
|
||||
StringVal s = AggregateFunctions::window_funnel_serialize(context, stringVal1);
|
||||
|
||||
StringVal stringVal2;
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal2);
|
||||
AggregateFunctions::window_funnel_merge(context, s, &stringVal2);
|
||||
IntVal v = AggregateFunctions::window_funnel_finalize(context, stringVal2);
|
||||
LOG(INFO) << "event num: " << NUM_CONDS << " window: " << window.val;
|
||||
EXPECT_EQ(v.val, 1);
|
||||
}
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(WindowFunnelTest, testMax4DuplicateSortedNoMerge) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
const int NUM_CONDS = 4;
|
||||
for (int i = -1; i < NUM_CONDS + 4; i++) {
|
||||
StringVal stringVal1;
|
||||
BigIntVal window(i);
|
||||
StringVal mode("default");
|
||||
std::vector<doris_udf::AnyVal*> constant_args;
|
||||
constant_args.emplace_back(&window);
|
||||
constant_args.emplace_back(&mode);
|
||||
context->impl()->set_constant_args(std::move(constant_args));
|
||||
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal1);
|
||||
|
||||
DateTimeVal timestamp;
|
||||
DateTimeValue time_value;
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 0, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
|
||||
BooleanVal conds[NUM_CONDS] = {true, false, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS, conds,
|
||||
&stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 1, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds1[NUM_CONDS] = {false, true, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds1, &stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 2, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds2[NUM_CONDS] = {true, false, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds2, &stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 3, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds3[NUM_CONDS] = {false, false, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds3, &stringVal1);
|
||||
|
||||
IntVal v = AggregateFunctions::window_funnel_finalize(context, stringVal1);
|
||||
LOG(INFO) << "event num: " << NUM_CONDS << " window: " << window.val;
|
||||
EXPECT_EQ(v.val, i < 0 ? 1 : (i < 2 ? i + 1 : 2));
|
||||
}
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(WindowFunnelTest, testMax4DuplicateSortedMerge) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
const int NUM_CONDS = 4;
|
||||
for (int i = -1; i < NUM_CONDS + 4; i++) {
|
||||
StringVal stringVal1;
|
||||
BigIntVal window(i);
|
||||
StringVal mode("default");
|
||||
std::vector<doris_udf::AnyVal*> constant_args;
|
||||
constant_args.emplace_back(&window);
|
||||
constant_args.emplace_back(&mode);
|
||||
context->impl()->set_constant_args(std::move(constant_args));
|
||||
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal1);
|
||||
|
||||
DateTimeVal timestamp;
|
||||
DateTimeValue time_value;
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 0, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
|
||||
BooleanVal conds[NUM_CONDS] = {true, false, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS, conds,
|
||||
&stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 1, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds1[NUM_CONDS] = {false, true, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds1, &stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 2, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds2[NUM_CONDS] = {true, false, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds2, &stringVal1);
|
||||
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 3, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
BooleanVal conds3[NUM_CONDS] = {false, false, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, NUM_CONDS,
|
||||
conds3, &stringVal1);
|
||||
|
||||
StringVal s = AggregateFunctions::window_funnel_serialize(context, stringVal1);
|
||||
|
||||
StringVal stringVal2;
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal2);
|
||||
AggregateFunctions::window_funnel_merge(context, s, &stringVal2);
|
||||
IntVal v = AggregateFunctions::window_funnel_finalize(context, stringVal2);
|
||||
LOG(INFO) << "event num: " << NUM_CONDS << " window: " << window.val;
|
||||
EXPECT_EQ(v.val, i < 0 ? 1 : (i < 2 ? i + 1 : 2));
|
||||
}
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(WindowFunnelTest, testNoMatchedEvent) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
StringVal stringVal1;
|
||||
BigIntVal window(0);
|
||||
StringVal mode("default");
|
||||
std::vector<doris_udf::AnyVal*> constant_args;
|
||||
constant_args.emplace_back(&window);
|
||||
constant_args.emplace_back(&mode);
|
||||
context->impl()->set_constant_args(std::move(constant_args));
|
||||
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal1);
|
||||
|
||||
DateTimeVal timestamp;
|
||||
DateTimeValue time_value;
|
||||
time_value.set_time(2020, 2, 28, 0, 0, 0, 0);
|
||||
time_value.to_datetime_val(×tamp);
|
||||
|
||||
BooleanVal conds[4] = {false, false, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, 4, conds,
|
||||
&stringVal1);
|
||||
|
||||
IntVal v = AggregateFunctions::window_funnel_finalize(context, stringVal1);
|
||||
EXPECT_EQ(v.val, 0);
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(WindowFunnelTest, testNoEvent) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
StringVal stringVal1;
|
||||
BigIntVal window(0);
|
||||
StringVal mode("default");
|
||||
std::vector<doris_udf::AnyVal*> constant_args;
|
||||
constant_args.emplace_back(&window);
|
||||
constant_args.emplace_back(&mode);
|
||||
context->impl()->set_constant_args(std::move(constant_args));
|
||||
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal1);
|
||||
|
||||
IntVal v = AggregateFunctions::window_funnel_finalize(context, stringVal1);
|
||||
EXPECT_EQ(v.val, 0);
|
||||
|
||||
StringVal stringVal2;
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal2);
|
||||
|
||||
v = AggregateFunctions::window_funnel_finalize(context, stringVal2);
|
||||
EXPECT_EQ(v.val, 0);
|
||||
|
||||
delete futil;
|
||||
}
|
||||
|
||||
TEST_F(WindowFunnelTest, testInputNull) {
|
||||
FunctionUtils* futil = new FunctionUtils();
|
||||
doris_udf::FunctionContext* context = futil->get_fn_ctx();
|
||||
|
||||
BigIntVal window(0);
|
||||
StringVal mode("default");
|
||||
std::vector<doris_udf::AnyVal*> constant_args;
|
||||
constant_args.emplace_back(&window);
|
||||
constant_args.emplace_back(&mode);
|
||||
context->impl()->set_constant_args(std::move(constant_args));
|
||||
|
||||
StringVal stringVal1;
|
||||
AggregateFunctions::window_funnel_init(context, &stringVal1);
|
||||
|
||||
DateTimeVal timestamp = DateTimeVal::null();
|
||||
BooleanVal conds[4] = {false, false, false, false};
|
||||
AggregateFunctions::window_funnel_update(context, window, mode, timestamp, 4, conds,
|
||||
&stringVal1);
|
||||
|
||||
IntVal v = AggregateFunctions::window_funnel_finalize(context, stringVal1);
|
||||
EXPECT_EQ(v.val, 0);
|
||||
|
||||
delete futil;
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
@ -1,190 +0,0 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "olap/byte_buffer.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
#include "common/configbase.h"
|
||||
#include "olap/file_helper.h"
|
||||
|
||||
namespace doris {
|
||||
using namespace ErrorCode;
|
||||
|
||||
class TestByteBuffer : public testing::Test {
|
||||
public:
|
||||
virtual ~TestByteBuffer() {}
|
||||
virtual void SetUp() {}
|
||||
virtual void TearDown() {
|
||||
if (std::filesystem::exists(".test_byte_buffer")) {
|
||||
EXPECT_TRUE(std::filesystem::remove_all(".test_byte_buffer"));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// 测试基本的读写功能
|
||||
TEST_F(TestByteBuffer, TestReadWrite) {
|
||||
StorageByteBuffer* buf1 = nullptr;
|
||||
|
||||
buf1 = StorageByteBuffer::create(100);
|
||||
EXPECT_TRUE(buf1 != nullptr);
|
||||
|
||||
char in[10] = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'};
|
||||
for (int i = 0; i < 5; i++) {
|
||||
EXPECT_EQ(Status::OK(), buf1->put(in, sizeof(in)));
|
||||
EXPECT_EQ(100u - (i + 1) * sizeof(in), buf1->remaining());
|
||||
EXPECT_EQ((i + 1) * sizeof(in), buf1->position());
|
||||
}
|
||||
|
||||
// 参数错误的指定写
|
||||
EXPECT_EQ(Status::Error<OUT_OF_BOUND>(), buf1->put(in, sizeof(in), 5, 10));
|
||||
|
||||
for (int i = 0; i < 50; i++) {
|
||||
EXPECT_EQ(Status::OK(), buf1->put(i));
|
||||
EXPECT_EQ(50u - (i + 1), buf1->remaining());
|
||||
EXPECT_EQ(50u + i + 1, buf1->position());
|
||||
}
|
||||
|
||||
// 再写就失败了
|
||||
EXPECT_EQ(Status::Error<BUFFER_OVERFLOW>(), buf1->put(0));
|
||||
EXPECT_EQ(Status::Error<BUFFER_OVERFLOW>(), buf1->put(in, sizeof(in)));
|
||||
|
||||
// 转为读模式
|
||||
buf1->flip();
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (int j = 0; j < 10; j++) {
|
||||
char byte;
|
||||
EXPECT_EQ(Status::OK(), buf1->get(&byte));
|
||||
EXPECT_EQ(100u - (i * 10 + j + 1), buf1->remaining());
|
||||
EXPECT_EQ(i * 10 + j + 1, buf1->position());
|
||||
EXPECT_EQ('a' + j, byte);
|
||||
}
|
||||
}
|
||||
char buf[50];
|
||||
EXPECT_EQ(Status::Error<OUT_OF_BOUND>(), buf1->get(buf, 100));
|
||||
EXPECT_EQ(Status::Error<BUFFER_OVERFLOW>(), buf1->get(buf, 10, 50));
|
||||
EXPECT_EQ(Status::OK(), buf1->get(buf, sizeof(buf)));
|
||||
EXPECT_EQ(0u, buf1->remaining());
|
||||
EXPECT_EQ(100u, buf1->position());
|
||||
|
||||
for (int i = 0; i < 50; i++) {
|
||||
EXPECT_EQ(i, buf[i]);
|
||||
}
|
||||
char byte;
|
||||
EXPECT_EQ(Status::Error<OUT_OF_BOUND>(), buf1->get(&byte));
|
||||
EXPECT_EQ(Status::Error<OUT_OF_BOUND>(), buf1->get(&byte, 1));
|
||||
|
||||
EXPECT_EQ(Status::OK(), buf1->put(10, 'x'));
|
||||
EXPECT_EQ(Status::OK(), buf1->get(10, &byte));
|
||||
EXPECT_EQ('x', byte);
|
||||
|
||||
EXPECT_EQ(Status::OK(), buf1->set_limit(11));
|
||||
EXPECT_EQ(11u, buf1->limit());
|
||||
EXPECT_EQ(11u, buf1->position());
|
||||
EXPECT_EQ(Status::Error<INVALID_ARGUMENT>(), buf1->set_limit(101));
|
||||
EXPECT_EQ(Status::OK(), buf1->set_position(10));
|
||||
EXPECT_EQ(Status::OK(), buf1->get(&byte));
|
||||
EXPECT_EQ('x', byte);
|
||||
EXPECT_EQ(Status::Error<INVALID_ARGUMENT>(), buf1->set_position(12));
|
||||
|
||||
SAFE_DELETE(buf1);
|
||||
}
|
||||
|
||||
// 测试ByteBuffer对内存的引用, 尤其是智能指针的引用传递
|
||||
// 使用valgrind进行内存泄露检查
|
||||
TEST_F(TestByteBuffer, TestRef) {
|
||||
StorageByteBuffer* buf1 = nullptr;
|
||||
|
||||
buf1 = StorageByteBuffer::create(1000);
|
||||
EXPECT_TRUE(buf1 != nullptr);
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
EXPECT_EQ(Status::OK(), buf1->put(i));
|
||||
}
|
||||
StorageByteBuffer buf2 = *buf1;
|
||||
EXPECT_EQ(buf2.array(), buf1->array());
|
||||
StorageByteBuffer buf4(*buf1);
|
||||
EXPECT_EQ(buf2.array(), buf1->array());
|
||||
|
||||
StorageByteBuffer* buf3 = nullptr;
|
||||
buf3 = StorageByteBuffer::reference_buffer(buf1, 10, 90);
|
||||
|
||||
EXPECT_EQ(90u, buf3->capacity());
|
||||
EXPECT_EQ(90u, buf3->limit());
|
||||
EXPECT_EQ(0u, buf3->position());
|
||||
|
||||
for (int i = 0; i < 90; i++) {
|
||||
char byte;
|
||||
EXPECT_EQ(Status::OK(), buf3->get(&byte));
|
||||
EXPECT_EQ(i + 10, byte);
|
||||
}
|
||||
|
||||
EXPECT_EQ(4u, buf1->_buf.use_count());
|
||||
|
||||
SAFE_DELETE(buf1);
|
||||
SAFE_DELETE(buf3);
|
||||
EXPECT_EQ(2u, buf2._buf.use_count());
|
||||
}
|
||||
|
||||
TEST_F(TestByteBuffer, TestMmap) {
|
||||
FileHandler file_handle;
|
||||
std::string file_name = ".test_byte_buffer";
|
||||
Status res = file_handle.open_with_mode(file_name, O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR);
|
||||
EXPECT_EQ(Status::OK(), res);
|
||||
|
||||
char buf[100];
|
||||
for (int i = 0; i < 100; i++) {
|
||||
buf[i] = i;
|
||||
}
|
||||
EXPECT_EQ(Status::OK(), file_handle.write(buf, 100));
|
||||
file_handle.close();
|
||||
|
||||
res = file_handle.open(file_name, O_RDWR);
|
||||
EXPECT_EQ(Status::OK(), res);
|
||||
StorageByteBuffer* buf1 = StorageByteBuffer::mmap(nullptr, 80, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED, file_handle.fd(), 0);
|
||||
// mmap完成后就可以关闭原fd
|
||||
file_handle.close();
|
||||
EXPECT_TRUE(buf1 != nullptr);
|
||||
|
||||
for (int i = 0; i < 80; i++) {
|
||||
char byte;
|
||||
EXPECT_EQ(Status::OK(), buf1->get(&byte));
|
||||
EXPECT_EQ(i, byte);
|
||||
}
|
||||
|
||||
// 测试通过mmap写入数据
|
||||
buf1->set_position(0);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
EXPECT_EQ(Status::OK(), buf1->put('x'));
|
||||
}
|
||||
|
||||
SAFE_DELETE(buf1);
|
||||
|
||||
res = file_handle.open(file_name, O_RDONLY);
|
||||
EXPECT_EQ(Status::OK(), res);
|
||||
EXPECT_EQ(Status::OK(), file_handle.pread(buf, 10, SEEK_SET));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
EXPECT_EQ('x', buf[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace doris
|
||||
@ -24,7 +24,6 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "gen_cpp/Descriptors_types.h"
|
||||
#include "util/compress.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user