diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 1498f41488..6b150cfefc 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -23,9 +23,7 @@ #include "common/config.h" #include "common/logging.h" -#include "exprs/json_functions.h" #include "exprs/like_predicate.h" -#include "exprs/match_predicate.h" #include "exprs/math_functions.h" #include "exprs/string_functions.h" #include "geo/geo_functions.h" @@ -356,11 +354,7 @@ void Daemon::init(int argc, char** argv, const std::vector& paths) { MemInfo::init(); UserFunctionCache::instance()->init(config::user_function_dir); LikePredicate::init(); - StringFunctions::init(); - MathFunctions::init(); - JsonFunctions::init(); GeoFunctions::init(); - MatchPredicate::init(); LOG(INFO) << CpuInfo::debug_string(); LOG(INFO) << DiskInfo::debug_string(); diff --git a/be/src/exec/arrow/arrow_reader.h b/be/src/exec/arrow/arrow_reader.h index 7fce8f0925..b5e5aa22d6 100644 --- a/be/src/exec/arrow/arrow_reader.h +++ b/be/src/exec/arrow/arrow_reader.h @@ -44,7 +44,6 @@ class ExecEnv; class TBrokerRangeDesc; class TNetworkAddress; class RuntimeState; -class Tuple; class SlotDescriptor; class MemPool; class FileReader; @@ -84,10 +83,7 @@ public: virtual ~ArrowReaderWrap(); virtual Status init_reader(const TupleDescriptor* tuple_desc, const std::string& timezone) = 0; - // for row - virtual Status read(Tuple* tuple, MemPool* mem_pool, bool* eof) { - return Status::NotSupported("Not Implemented read"); - } + // for vec Status get_next_block(vectorized::Block* block, size_t* read_row, bool* eof) override; // This method should be deprecated once the old scanner is removed. diff --git a/be/src/exec/arrow/parquet_reader.cpp b/be/src/exec/arrow/parquet_reader.cpp index ba2fe9098f..0a376f2deb 100644 --- a/be/src/exec/arrow/parquet_reader.cpp +++ b/be/src/exec/arrow/parquet_reader.cpp @@ -118,32 +118,6 @@ Status ParquetReaderWrap::size(int64_t* size) { } } -// TODO: NEED TO REWRITE COMPLETELY. the way writing now is WRONG. -// StringRef shouldn't managing exclusive memory cause it will break RAII. -// besides, accessing object which is essentially const by non-const object -// is UB! -inline void ParquetReaderWrap::fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, - const uint8_t* value, int32_t len) { - tuple->set_not_null(slot_desc->null_indicator_offset()); - void* slot = tuple->get_slot(slot_desc->tuple_offset()); - StringRef* str_slot = reinterpret_cast(slot); - str_slot->data = reinterpret_cast(mem_pool->allocate(len)); - memcpy(const_cast(str_slot->data), value, len); // ! - str_slot->size = len; -} - -inline Status ParquetReaderWrap::set_field_null(Tuple* tuple, const SlotDescriptor* slot_desc) { - if (!slot_desc->is_nullable()) { - std::stringstream str_error; - str_error << "The field name(" << slot_desc->col_name() - << ") is not allowed null, but Parquet field is null."; - LOG(WARNING) << str_error.str(); - return Status::RuntimeError(str_error.str()); - } - tuple->set_null(slot_desc->null_indicator_offset()); - return Status::OK(); -} - Status ParquetReaderWrap::read_record_batch(bool* eof) { if (_current_line_of_group >= _rows_of_group) { // read next row group VLOG_DEBUG << "read_record_batch, current group id:" << _current_group @@ -233,288 +207,6 @@ Status ParquetReaderWrap::init_parquet_type() { return Status::OK(); } -Status ParquetReaderWrap::read(Tuple* tuple, MemPool* mem_pool, bool* eof) { - if (_batch == nullptr) { - _current_line_of_group += _rows_of_group; - return read_record_batch(eof); - } - uint8_t tmp_buf[128] = {0}; - int32_t wbytes = 0; - const uint8_t* value = nullptr; - int column_index = 0; - try { - size_t slots = _include_column_ids.size(); - for (size_t i = 0; i < slots; ++i) { - auto slot_desc = _file_slot_descs[i]; - column_index = i; // column index in batch record - switch (_parquet_column_type[i]) { - case arrow::Type::type::STRING: { - auto str_array = - std::static_pointer_cast(_batch->column(column_index)); - if (str_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - value = str_array->GetValue(_current_line_of_batch, &wbytes); - fill_slot(tuple, slot_desc, mem_pool, value, wbytes); - } - break; - } - case arrow::Type::type::INT32: { - auto int32_array = - std::static_pointer_cast(_batch->column(column_index)); - if (int32_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - int32_t value = int32_array->Value(_current_line_of_batch); - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%d", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::INT64: { - auto int64_array = - std::static_pointer_cast(_batch->column(column_index)); - if (int64_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - int64_t value = int64_array->Value(_current_line_of_batch); - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%" PRId64, value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::UINT32: { - auto uint32_array = - std::static_pointer_cast(_batch->column(column_index)); - if (uint32_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - uint32_t value = uint32_array->Value(_current_line_of_batch); - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%u", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::UINT64: { - auto uint64_array = - std::static_pointer_cast(_batch->column(column_index)); - if (uint64_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - uint64_t value = uint64_array->Value(_current_line_of_batch); - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%" PRIu64, value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::BINARY: { - auto str_array = - std::static_pointer_cast(_batch->column(column_index)); - if (str_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - value = str_array->GetValue(_current_line_of_batch, &wbytes); - fill_slot(tuple, slot_desc, mem_pool, value, wbytes); - } - break; - } - case arrow::Type::type::FIXED_SIZE_BINARY: { - auto fixed_array = std::static_pointer_cast( - _batch->column(column_index)); - if (fixed_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - std::string value = fixed_array->GetString(_current_line_of_batch); - fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)value.c_str(), value.length()); - } - break; - } - case arrow::Type::type::BOOL: { - auto boolean_array = - std::static_pointer_cast(_batch->column(column_index)); - if (boolean_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - bool value = boolean_array->Value(_current_line_of_batch); - if (value) { - fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)"true", 4); - } else { - fill_slot(tuple, slot_desc, mem_pool, (uint8_t*)"false", 5); - } - } - break; - } - case arrow::Type::type::UINT8: { - auto uint8_array = - std::static_pointer_cast(_batch->column(column_index)); - if (uint8_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - uint8_t value = uint8_array->Value(_current_line_of_batch); - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%d", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::INT8: { - auto int8_array = - std::static_pointer_cast(_batch->column(column_index)); - if (int8_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - int8_t value = int8_array->Value(_current_line_of_batch); - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%d", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::UINT16: { - auto uint16_array = - std::static_pointer_cast(_batch->column(column_index)); - if (uint16_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - uint16_t value = uint16_array->Value(_current_line_of_batch); - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%d", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::INT16: { - auto int16_array = - std::static_pointer_cast(_batch->column(column_index)); - if (int16_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - int16_t value = int16_array->Value(_current_line_of_batch); - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%d", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::HALF_FLOAT: { - auto half_float_array = std::static_pointer_cast( - _batch->column(column_index)); - if (half_float_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - float value = half_float_array->Value(_current_line_of_batch); - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%f", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::FLOAT: { - auto float_array = - std::static_pointer_cast(_batch->column(column_index)); - if (float_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - float value = float_array->Value(_current_line_of_batch); - // Because the decimal type currently only supports (27, 9). - // Therefore, we use %.9f to give priority to the progress of the decimal type. - // Cannot use %f directly, this will cause 4000.9 to be converted to 4000.8999 - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%.9f", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::DOUBLE: { - auto double_array = - std::static_pointer_cast(_batch->column(column_index)); - if (double_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - double value = double_array->Value(_current_line_of_batch); - wbytes = snprintf((char*)tmp_buf, sizeof(tmp_buf), "%.9f", value); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::TIMESTAMP: { - auto ts_array = std::static_pointer_cast( - _batch->column(column_index)); - if (ts_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - RETURN_IF_ERROR(handle_timestamp(ts_array, tmp_buf, - &wbytes)); // convert timestamp to string time - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::DECIMAL: { - auto decimal_array = - std::static_pointer_cast(_batch->column(column_index)); - if (decimal_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - std::string value = decimal_array->FormatValue(_current_line_of_batch); - fill_slot(tuple, slot_desc, mem_pool, (const uint8_t*)value.c_str(), - value.length()); - } - break; - } - case arrow::Type::type::DATE32: { - auto ts_array = - std::static_pointer_cast(_batch->column(column_index)); - if (ts_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - time_t timestamp = (time_t)((int64_t)ts_array->Value(_current_line_of_batch) * - 24 * 60 * 60); - struct tm local; - localtime_r(×tamp, &local); - char* to = reinterpret_cast(&tmp_buf); - wbytes = (uint32_t)strftime(to, 64, "%Y-%m-%d", &local); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - case arrow::Type::type::DATE64: { - auto ts_array = - std::static_pointer_cast(_batch->column(column_index)); - if (ts_array->IsNull(_current_line_of_batch)) { - RETURN_IF_ERROR(set_field_null(tuple, slot_desc)); - } else { - // convert milliseconds to seconds - time_t timestamp = - (time_t)((int64_t)ts_array->Value(_current_line_of_batch) / 1000); - struct tm local; - localtime_r(×tamp, &local); - char* to = reinterpret_cast(&tmp_buf); - wbytes = (uint32_t)strftime(to, 64, "%Y-%m-%d %H:%M:%S", &local); - fill_slot(tuple, slot_desc, mem_pool, tmp_buf, wbytes); - } - break; - } - default: { - // other type not support. - std::stringstream str_error; - str_error << "The field name(" << slot_desc->col_name() << "), type(" - << _parquet_column_type[i] - << ") not support. RowGroup: " << _current_group - << ", Row: " << _current_line_of_group - << ", ColumnIndex:" << column_index; - LOG(WARNING) << str_error.str(); - return Status::InternalError(str_error.str()); - } - } - } - } catch (parquet::ParquetException& e) { - std::stringstream str_error; - str_error << e.what() << " RowGroup:" << _current_group - << ", Row:" << _current_line_of_group << ", ColumnIndex " << column_index; - LOG(WARNING) << str_error.str(); - return Status::InternalError(str_error.str()); - } - - // update data value - ++_current_line_of_group; - ++_current_line_of_batch; - return read_record_batch(eof); -} - Status ParquetReaderWrap::read_next_batch() { std::unique_lock lock(_mtx); while (!_closed && _queue.empty()) { diff --git a/be/src/exec/arrow/parquet_reader.h b/be/src/exec/arrow/parquet_reader.h index 3d8bb0d36a..4de5b5167c 100644 --- a/be/src/exec/arrow/parquet_reader.h +++ b/be/src/exec/arrow/parquet_reader.h @@ -51,7 +51,6 @@ class ExecEnv; class TBrokerRangeDesc; class TNetworkAddress; class RuntimeState; -class Tuple; class SlotDescriptor; class MemPool; class FileReader; @@ -66,16 +65,11 @@ public: int64_t range_start_offset, int64_t range_size, bool case_sensitive = true); ~ParquetReaderWrap() override = default; - // Read - Status read(Tuple* tuple, MemPool* mem_pool, bool* eof) override; Status size(int64_t* size) override; Status init_reader(const TupleDescriptor* tuple_desc, const std::string& timezone) override; Status init_parquet_type(); private: - void fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value, - int32_t len); - Status set_field_null(Tuple* tuple, const SlotDescriptor* slot_desc); Status read_record_batch(bool* eof); Status handle_timestamp(const std::shared_ptr& ts_array, uint8_t* buf, int32_t* wbtyes); diff --git a/be/src/exec/base_scanner.h b/be/src/exec/base_scanner.h index 0be92f9437..8706c1b9c9 100644 --- a/be/src/exec/base_scanner.h +++ b/be/src/exec/base_scanner.h @@ -25,7 +25,6 @@ namespace doris { -class Tuple; class TupleDescriptor; class RowDescriptor; class RuntimeState; @@ -59,11 +58,6 @@ public: // Open this scanner, will initialize information need to virtual Status open(); - // Get next tuple - virtual Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) { - return Status::NotSupported("Not Implemented get block"); - } - // Get next block virtual Status get_next(vectorized::Block* block, bool* eof) { return Status::NotSupported("Not Implemented get block"); diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 46eced5d08..6c6a5240f5 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -302,274 +302,6 @@ const std::string& ScrollParser::get_scroll_id() { return _scroll_id; } -Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, - MemPool* tuple_pool, bool* line_eof, - const std::map& docvalue_context) { - *line_eof = true; - - if (_size <= 0 || _line_index >= _size) { - return Status::OK(); - } - - const rapidjson::Value& obj = _inner_hits_node[_line_index++]; - bool pure_doc_value = false; - if (obj.HasMember("fields")) { - pure_doc_value = true; - } - const rapidjson::Value& line = obj.HasMember(FIELD_SOURCE) ? obj[FIELD_SOURCE] : obj["fields"]; - - tuple->init(tuple_desc->byte_size()); - for (int i = 0; i < tuple_desc->slots().size(); ++i) { - const SlotDescriptor* slot_desc = tuple_desc->slots()[i]; - - if (!slot_desc->is_materialized()) { - continue; - } - // _id field must exist in every document, this is guaranteed by ES - // if _id was found in tuple, we would get `_id` value from inner-hit node - // json-format response would like below: - // "hits": { - // "hits": [ - // { - // "_id": "UhHNc3IB8XwmcbhBk1ES", - // "_source": { - // "k": 201, - // } - // } - // ] - // } - if (slot_desc->col_name() == FIELD_ID) { - // actually this branch will not be reached, this is guaranteed by Doris FE. - if (pure_doc_value) { - return Status::RuntimeError("obtain `_id` is not supported in doc_values mode"); - } - tuple->set_not_null(slot_desc->null_indicator_offset()); - void* slot = tuple->get_slot(slot_desc->tuple_offset()); - // obj[FIELD_ID] must not be nullptr - std::string _id = obj[FIELD_ID].GetString(); - size_t len = _id.length(); - char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(len)); - if (UNLIKELY(buffer == nullptr)) { - std::string details = strings::Substitute(ERROR_MEM_LIMIT_EXCEEDED, - "MaterializeNextRow", len, "string slot"); - RETURN_LIMIT_EXCEEDED(nullptr, details, len); - } - memcpy(buffer, _id.data(), len); - reinterpret_cast(slot)->data = buffer; - reinterpret_cast(slot)->size = len; - continue; - } - - // if pure_doc_value enabled, docvalue_context must contains the key - // todo: need move all `pure_docvalue` for every tuple outside fill_tuple - // should check pure_docvalue for one table scan not every tuple - const char* col_name = pure_doc_value ? docvalue_context.at(slot_desc->col_name()).c_str() - : slot_desc->col_name().c_str(); - - rapidjson::Value::ConstMemberIterator itr = line.FindMember(col_name); - if (itr == line.MemberEnd()) { - tuple->set_null(slot_desc->null_indicator_offset()); - continue; - } - - tuple->set_not_null(slot_desc->null_indicator_offset()); - const rapidjson::Value& col = line[col_name]; - - void* slot = tuple->get_slot(slot_desc->tuple_offset()); - PrimitiveType type = slot_desc->type().type; - - // when the column value is null, the subsequent type casting will report an error - if (col.IsNull()) { - slot = nullptr; - continue; - } - switch (type) { - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_STRING: { - // sometimes elasticsearch user post some not-string value to Elasticsearch Index. - // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation - // this may be a tricky, but we can work around this issue - std::string val; - if (pure_doc_value) { - if (!col[0].IsString()) { - val = json_value_to_string(col[0]); - } else { - val = col[0].GetString(); - } - } else { - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - if (!col.IsString()) { - val = json_value_to_string(col); - } else { - val = col.GetString(); - } - } - size_t val_size = val.length(); - char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size)); - if (UNLIKELY(buffer == nullptr)) { - std::string details = strings::Substitute( - ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", val_size, "string slot"); - RETURN_LIMIT_EXCEEDED(nullptr, details, val_size); - } - memcpy(buffer, val.data(), val_size); - reinterpret_cast(slot)->data = buffer; - reinterpret_cast(slot)->size = val_size; - break; - } - - case TYPE_TINYINT: { - Status status = get_int_value(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } - break; - } - - case TYPE_SMALLINT: { - Status status = get_int_value(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } - break; - } - - case TYPE_INT: { - Status status = get_int_value(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } - break; - } - - case TYPE_BIGINT: { - Status status = get_int_value(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } - break; - } - - case TYPE_LARGEINT: { - Status status = get_int_value<__int128>(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } - break; - } - - case TYPE_DOUBLE: { - Status status = get_float_value(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } - break; - } - - case TYPE_FLOAT: { - Status status = get_float_value(col, type, slot, pure_doc_value); - if (!status.ok()) { - return status; - } - break; - } - - case TYPE_BOOLEAN: { - if (col.IsBool()) { - *reinterpret_cast(slot) = col.GetBool(); - break; - } - - if (col.IsNumber()) { - *reinterpret_cast(slot) = col.GetInt(); - break; - } - - bool is_nested_str = false; - if (pure_doc_value && col.IsArray() && col[0].IsBool()) { - *reinterpret_cast(slot) = col[0].GetBool(); - break; - } else if (pure_doc_value && col.IsArray() && col[0].IsString()) { - is_nested_str = true; - } else if (pure_doc_value && col.IsArray()) { - return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); - } - - const rapidjson::Value& str_col = is_nested_str ? col[0] : col; - const std::string& val = str_col.GetString(); - size_t val_size = str_col.GetStringLength(); - StringParser::ParseResult result; - bool b = StringParser::string_to_bool(val.c_str(), val_size, &result); - RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type); - *reinterpret_cast(slot) = b; - break; - } - case TYPE_DECIMALV2: { - DecimalV2Value data; - - if (col.IsDouble()) { - data.assign_from_double(col.GetDouble()); - } else { - std::string val; - if (pure_doc_value) { - if (!col[0].IsString()) { - val = json_value_to_string(col[0]); - } else { - val = col[0].GetString(); - } - } else { - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - if (!col.IsString()) { - val = json_value_to_string(col); - } else { - val = col.GetString(); - } - } - data.parse_from_str(val.data(), val.length()); - } - reinterpret_cast(slot)->set_value(data.value()); - break; - } - - case TYPE_DATE: - case TYPE_DATETIME: { - // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source - if (col.IsNumber()) { - // ES process date/datetime field would use millisecond timestamp for index or docvalue - // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms - // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds - RETURN_IF_ERROR(fill_date_slot_with_timestamp(slot, col, type)); - } else if (col.IsArray() && pure_doc_value) { - // this would happened just only when `enable_docvalue_scan = true` - // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose - // a standard date-format for date field as `2020-06-16T00:00:00.000Z` - // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for - // date field's docvalue - if (col[0].IsString()) { - RETURN_IF_ERROR(fill_date_slot_with_strval(slot, col[0], type)); - break; - } - // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds - RETURN_IF_ERROR(fill_date_slot_with_timestamp(slot, col[0], type)); - } else { - // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); - RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); - RETURN_IF_ERROR(fill_date_slot_with_strval(slot, col, type)); - } - break; - } - default: { - DCHECK(false); - break; - } - } - } - - *line_eof = false; - return Status::OK(); -} - Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, std::vector& columns, MemPool* tuple_pool, bool* line_eof, diff --git a/be/src/exec/es/es_scroll_parser.h b/be/src/exec/es/es_scroll_parser.h index f3c7af70fc..40d421a924 100644 --- a/be/src/exec/es/es_scroll_parser.h +++ b/be/src/exec/es/es_scroll_parser.h @@ -34,8 +34,6 @@ public: ~ScrollParser(); Status parse(const std::string& scroll_result, bool exactly_once = false); - Status fill_tuple(const TupleDescriptor* _tuple_desc, Tuple* tuple, MemPool* mem_pool, - bool* line_eof, const std::map& docvalue_context); Status fill_columns(const TupleDescriptor* _tuple_desc, std::vector& columns, MemPool* mem_pool, bool* line_eof, const std::map& docvalue_context); diff --git a/be/src/exec/text_converter.h b/be/src/exec/text_converter.h index 69c27e2c29..b8aa8fb5ba 100644 --- a/be/src/exec/text_converter.h +++ b/be/src/exec/text_converter.h @@ -24,7 +24,6 @@ class MemPool; class SlotDescriptor; class Status; struct StringRef; -class Tuple; class TupleDescriptor; // Helper class for dealing with text data, e.g., converting text data to @@ -33,19 +32,6 @@ class TextConverter { public: TextConverter(char escape_char); - // Converts slot data, of length 'len', into type of slot_desc, - // and writes the result into the tuples's slot. - // copy_string indicates whether we need to make a separate copy of the string data: - // For regular unescaped strings, we point to the original data in the _file_buf. - // For regular escaped strings, we copy an its unescaped string into a separate buffer - // and point to it. - // If the string needs to be copied, the memory is allocated from 'pool', otherwise - // 'pool' is unused. - // Unsuccessful conversions are turned into NULLs. - // Returns true if the value was written successfully. - bool write_slot(const SlotDescriptor* slot_desc, Tuple* tuple, const char* data, int len, - bool copy_string, bool need_escape, MemPool* pool); - void write_string_column(const SlotDescriptor* slot_desc, vectorized::MutableColumnPtr* column_ptr, const char* data, size_t len); diff --git a/be/src/exec/text_converter.hpp b/be/src/exec/text_converter.hpp index b6add9183f..d25b19340f 100644 --- a/be/src/exec/text_converter.hpp +++ b/be/src/exec/text_converter.hpp @@ -36,135 +36,6 @@ namespace doris { -// Note: this function has a codegen'd version. Changing this function requires -// corresponding changes to CodegenWriteSlot. -inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, Tuple* tuple, - const char* data, int len, bool copy_string, bool need_escape, - MemPool* pool) { - //Small batch import only \N is considered to be NULL, there is no replace_value function for batch import - if (slot_desc->is_nullable()) { - if (len == 2 && data[0] == '\\' && data[1] == 'N') { - tuple->set_null(slot_desc->null_indicator_offset()); - return true; - } else { - tuple->set_not_null(slot_desc->null_indicator_offset()); - } - } - - StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; - void* slot = tuple->get_slot(slot_desc->tuple_offset()); - - // Parse the raw-text data. Translate the text string to internal format. - switch (slot_desc->type().type) { - case TYPE_HLL: - case TYPE_VARCHAR: - case TYPE_CHAR: - case TYPE_STRING: { - StringRef* str_slot = reinterpret_cast(slot); - str_slot->data = const_cast(data); - str_slot->size = len; - if (len != 0 && (copy_string || need_escape)) { - DCHECK(pool != nullptr); - char* slot_data = reinterpret_cast(pool->allocate(len)); - - if (need_escape) { - unescape_string(data, slot_data, &str_slot->size); - } else { - memcpy(slot_data, data, str_slot->size); - } - - str_slot->data = slot_data; - } - - break; - } - - case TYPE_BOOLEAN: - *reinterpret_cast(slot) = StringParser::string_to_bool(data, len, &parse_result); - break; - - case TYPE_TINYINT: - *reinterpret_cast(slot) = - StringParser::string_to_int(data, len, &parse_result); - break; - - case TYPE_SMALLINT: - *reinterpret_cast(slot) = - StringParser::string_to_int(data, len, &parse_result); - break; - - case TYPE_INT: - *reinterpret_cast(slot) = - StringParser::string_to_int(data, len, &parse_result); - break; - - case TYPE_BIGINT: - *reinterpret_cast(slot) = - StringParser::string_to_int(data, len, &parse_result); - break; - - case TYPE_LARGEINT: { - __int128 tmp = StringParser::string_to_int<__int128>(data, len, &parse_result); - memcpy(slot, &tmp, sizeof(tmp)); - break; - } - - case TYPE_FLOAT: - *reinterpret_cast(slot) = - StringParser::string_to_float(data, len, &parse_result); - break; - - case TYPE_DOUBLE: - *reinterpret_cast(slot) = - StringParser::string_to_float(data, len, &parse_result); - break; - - case TYPE_DATE: { - DateTimeValue* ts_slot = reinterpret_cast(slot); - if (!ts_slot->from_date_str(data, len)) { - parse_result = StringParser::PARSE_FAILURE; - break; - } - - ts_slot->cast_to_date(); - break; - } - - case TYPE_DATETIME: { - DateTimeValue* ts_slot = reinterpret_cast(slot); - if (!ts_slot->from_date_str(data, len)) { - parse_result = StringParser::PARSE_FAILURE; - } - - ts_slot->to_datetime(); - break; - } - - case TYPE_DECIMALV2: { - DecimalV2Value decimal_slot; - - if (decimal_slot.parse_from_str(data, len)) { - parse_result = StringParser::PARSE_FAILURE; - } - - *reinterpret_cast(slot) = decimal_slot.value(); - break; - } - - default: - DCHECK(false) << "bad slot type: " << slot_desc->type(); - break; - } - - // TODO: add warning for overflow case - if (parse_result != StringParser::PARSE_SUCCESS) { - tuple->set_null(slot_desc->null_indicator_offset()); - return false; - } - - return true; -} - inline void TextConverter::write_string_column(const SlotDescriptor* slot_desc, vectorized::MutableColumnPtr* column_ptr, const char* data, size_t len) { diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt index 7f18934f58..fb16f804e9 100644 --- a/be/src/exprs/CMakeLists.txt +++ b/be/src/exprs/CMakeLists.txt @@ -28,7 +28,6 @@ add_library(Exprs runtime_filter.cpp runtime_filter_rpc.cpp like_predicate.cpp - match_predicate.cpp math_functions.cpp rpc_fn_comm.cpp string_functions.cpp diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp index 98756a4d8b..8dea4f1775 100644 --- a/be/src/exprs/json_functions.cpp +++ b/be/src/exprs/json_functions.cpp @@ -44,114 +44,6 @@ namespace doris { // json path cannot contains: ", [, ] static const re2::RE2 JSON_PATTERN("^([^\\\"\\[\\]]*)(?:\\[([0-9]+|\\*)\\])?"); -void JsonFunctions::init() {} - -IntVal JsonFunctions::get_json_int(FunctionContext* context, const StringVal& json_str, - const StringVal& path) { - if (json_str.is_null || path.is_null) { - return IntVal::null(); - } - std::string_view json_string((char*)json_str.ptr, json_str.len); - std::string_view path_string((char*)path.ptr, path.len); - rapidjson::Document document; - rapidjson::Value* root = - get_json_object(context, json_string, path_string, JSON_FUN_INT, &document); - if (root != nullptr && root->IsInt()) { - return IntVal(root->GetInt()); - } else { - return IntVal::null(); - } -} - -StringVal JsonFunctions::get_json_string(FunctionContext* context, const StringVal& json_str, - const StringVal& path) { - if (json_str.is_null || path.is_null) { - return StringVal::null(); - } - - std::string_view json_string((char*)json_str.ptr, json_str.len); - std::string_view path_string((char*)path.ptr, path.len); - rapidjson::Document document; - rapidjson::Value* root = - get_json_object(context, json_string, path_string, JSON_FUN_STRING, &document); - if (root == nullptr || root->IsNull()) { - return StringVal::null(); - } else if (root->IsString()) { - return AnyValUtil::from_string_temp(context, root->GetString()); - } else { - rapidjson::StringBuffer buf; - rapidjson::Writer writer(buf); - root->Accept(writer); - return AnyValUtil::from_string_temp(context, std::string(buf.GetString())); - } -} - -DoubleVal JsonFunctions::get_json_double(FunctionContext* context, const StringVal& json_str, - const StringVal& path) { - if (json_str.is_null || path.is_null) { - return DoubleVal::null(); - } - std::string_view json_string((char*)json_str.ptr, json_str.len); - std::string_view path_string((char*)path.ptr, path.len); - rapidjson::Document document; - rapidjson::Value* root = - get_json_object(context, json_string, path_string, JSON_FUN_DOUBLE, &document); - if (root == nullptr || root->IsNull()) { - return DoubleVal::null(); - } else if (root->IsInt()) { - return DoubleVal(static_cast(root->GetInt())); - } else if (root->IsDouble()) { - return DoubleVal(root->GetDouble()); - } else { - return DoubleVal::null(); - } -} - -StringVal JsonFunctions::json_array(FunctionContext* context, int num_args, - const StringVal* json_str) { - if (json_str->is_null) { - return StringVal::null(); - } - rapidjson::Value array_obj(rapidjson::kArrayType); - rapidjson::Document document; - rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); - //flag: The number it contains represents the type of previous parameters - const StringVal& flag = json_str[num_args - 1]; - DCHECK_EQ(num_args - 1, flag.len); - for (int i = 0; i < num_args - 1; ++i) { - const StringVal& arg = json_str[i]; - rapidjson::Value val = parse_str_with_flag(arg, flag, i, allocator); - array_obj.PushBack(val, allocator); - } - rapidjson::StringBuffer buf; - rapidjson::Writer writer(buf); - array_obj.Accept(writer); - return AnyValUtil::from_string_temp(context, std::string(buf.GetString())); -} - -StringVal JsonFunctions::json_object(FunctionContext* context, int num_args, - const StringVal* json_str) { - if (json_str->is_null) { - return StringVal::null(); - } - rapidjson::Document document(rapidjson::kObjectType); - rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); - const StringVal& flag = json_str[num_args - 1]; - document.SetObject(); - DCHECK_EQ(num_args - 1, flag.len); - for (int i = 1; i < num_args - 1; i = i + 2) { - const StringVal& arg = json_str[i]; - rapidjson::Value key(rapidjson::kStringType); - key.SetString((char*)json_str[i - 1].ptr, json_str[i - 1].len, allocator); - rapidjson::Value val = parse_str_with_flag(arg, flag, i, allocator); - document.AddMember(key, val, allocator); - } - rapidjson::StringBuffer buf; - rapidjson::Writer writer(buf); - document.Accept(writer); - return AnyValUtil::from_string_temp(context, std::string(buf.GetString())); -} - rapidjson::Value JsonFunctions::parse_str_with_flag(const StringVal& arg, const StringVal& flag, const int num, rapidjson::Document::AllocatorType& allocator) { @@ -185,17 +77,6 @@ rapidjson::Value JsonFunctions::parse_str_with_flag(const StringVal& arg, const } return val; } -StringVal JsonFunctions::json_quote(FunctionContext* context, const StringVal& json_str) { - if (json_str.is_null) { - return StringVal::null(); - } - rapidjson::Value array_obj(rapidjson::kObjectType); - array_obj.SetString(rapidjson::StringRef((char*)json_str.ptr, json_str.len)); - rapidjson::StringBuffer buf; - rapidjson::Writer writer(buf); - array_obj.Accept(writer); - return AnyValUtil::from_string_temp(context, std::string(buf.GetString())); -} rapidjson::Value* JsonFunctions::match_value(const std::vector& parsed_paths, rapidjson::Value* document, @@ -298,74 +179,6 @@ rapidjson::Value* JsonFunctions::match_value(const std::vector& parsed return root; } -rapidjson::Value* JsonFunctions::get_json_object(FunctionContext* context, - std::string_view json_string, - std::string_view path_string, - const JsonFunctionType& fntype, - rapidjson::Document* document) { - // split path by ".", and escape quota by "\" - // eg: - // '$.text#abc.xyz' -> [$, text#abc, xyz] - // '$."text.abc".xyz' -> [$, text.abc, xyz] - // '$."text.abc"[1].xyz' -> [$, text.abc[1], xyz] - JsonState* json_state = nullptr; - JsonState tmp_json_state; - -#ifndef BE_TEST - json_state = reinterpret_cast( - context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); - if (json_state == nullptr) { - json_state = &tmp_json_state; - } - - if (json_state->json_paths.size() == 0) { -#ifdef USE_LIBCPP - std::string s(path_string); - auto tok = get_json_token(s); -#else - auto tok = get_json_token(path_string); -#endif - std::vector paths(tok.begin(), tok.end()); - get_parsed_paths(paths, &json_state->json_paths); - } -#else - json_state = &tmp_json_state; - std::string s(path_string); - auto tok = get_json_token(s); - std::vector paths(tok.begin(), tok.end()); - get_parsed_paths(paths, &json_state->json_paths); -#endif - - VLOG_TRACE << "first parsed path: " << json_state->json_paths[0].debug_string(); - - if (!json_state->json_paths[0].is_valid) { - return document; - } - - if (UNLIKELY(json_state->json_paths.size() == 1)) { - if (fntype == JSON_FUN_STRING) { - document->SetString(json_string.data(), json_string.length(), document->GetAllocator()); - } else { - return document; - } - } - - if (!json_state->document.IsNull()) { - document = &json_state->document; - } else { - document->Parse(json_string.data(), json_string.length()); - //rapidjson::Document document; - if (UNLIKELY(document->HasParseError())) { - VLOG_CRITICAL << "Error at offset " << document->GetErrorOffset() << ": " - << GetParseError_En(document->GetParseError()); - document->SetNull(); - return document; - } - } - - return match_value(json_state->json_paths, document, document->GetAllocator()); -} - rapidjson::Value* JsonFunctions::get_json_array_from_parsed_json( const std::string& json_path, rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator, bool* wrap_explicitly) { @@ -426,49 +239,6 @@ rapidjson::Value* JsonFunctions::get_json_object_from_parsed_json( return root; } -void JsonFunctions::json_path_prepare(doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope) { - if (scope != FunctionContext::FRAGMENT_LOCAL) { - return; - } - - if (!context->is_arg_constant(0) && !context->is_arg_constant(1)) { - return; - } - - JsonState* json_state = new JsonState; - - StringVal* json_str = reinterpret_cast(context->get_constant_arg(0)); - if (json_str != nullptr && !json_str->is_null) { - std::string json_string((char*)json_str->ptr, json_str->len); - json_state->document.Parse(json_string.c_str()); - } - StringVal* path = reinterpret_cast(context->get_constant_arg(1)); - if (path != nullptr && !path->is_null) { - std::string path_str(reinterpret_cast(path->ptr), path->len); - boost::tokenizer> tok( - path_str, boost::escaped_list_separator("\\", ".", "\"")); - std::vector path_exprs(tok.begin(), tok.end()); - get_parsed_paths(path_exprs, &json_state->json_paths); - } - - context->set_function_state(scope, json_state); - VLOG_TRACE << "prepare json path. size: " << json_state->json_paths.size(); -} - -void JsonFunctions::json_path_close(doris_udf::FunctionContext* context, - doris_udf::FunctionContext::FunctionStateScope scope) { - if (scope != FunctionContext::FRAGMENT_LOCAL) { - return; - } - - JsonState* json_state = reinterpret_cast(context->get_function_state(scope)); - if (json_state != nullptr) { - delete json_state; - VLOG_TRACE << "close json state"; - } -} - void JsonFunctions::parse_json_paths(const std::string& path_string, std::vector* parsed_paths) { // split path by ".", and escape quota by "\" diff --git a/be/src/exprs/json_functions.h b/be/src/exprs/json_functions.h index 61b3e8d0db..71dcf8ec55 100644 --- a/be/src/exprs/json_functions.h +++ b/be/src/exprs/json_functions.h @@ -88,36 +88,8 @@ struct JsonPath { } }; -struct JsonState { - std::vector json_paths; - rapidjson::Document document; -}; - class JsonFunctions { public: - static void init(); - static doris_udf::IntVal get_json_int(doris_udf::FunctionContext* context, - const doris_udf::StringVal& json_str, - const doris_udf::StringVal& path); - static doris_udf::StringVal get_json_string(doris_udf::FunctionContext* context, - const doris_udf::StringVal& json_str, - const doris_udf::StringVal& path); - static doris_udf::DoubleVal get_json_double(doris_udf::FunctionContext* context, - const doris_udf::StringVal& json_str, - const doris_udf::StringVal& path); - - static rapidjson::Value* get_json_object(FunctionContext* context, std::string_view json_string, - std::string_view path_string, - const JsonFunctionType& fntype, - rapidjson::Document* document); - - static doris_udf::StringVal json_array(doris_udf::FunctionContext* context, int num_args, - const doris_udf::StringVal* json_str); - static doris_udf::StringVal json_object(doris_udf::FunctionContext* context, int num_args, - const doris_udf::StringVal* json_str); - static doris_udf::StringVal json_quote(doris_udf::FunctionContext* context, - const doris_udf::StringVal& json_str); - /** * The `document` parameter must be has parsed. * return Value Is Array object @@ -137,12 +109,6 @@ public: const std::vector& parsed_paths, rapidjson::Value* document, rapidjson::Document::AllocatorType& mem_allocator); - static void json_path_prepare(doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); - - static void json_path_close(doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); - static void parse_json_paths(const std::string& path_strings, std::vector* parsed_paths); diff --git a/be/src/exprs/math_functions.cpp b/be/src/exprs/math_functions.cpp index 6eacf5fe6e..94de1353e6 100644 --- a/be/src/exprs/math_functions.cpp +++ b/be/src/exprs/math_functions.cpp @@ -100,414 +100,6 @@ double MathFunctions::my_double_round(double value, int64_t dec, bool dec_unsign return tmp2; } -void MathFunctions::init() {} - -DoubleVal MathFunctions::pi(FunctionContext* ctx) { - return DoubleVal(M_PI); -} - -DoubleVal MathFunctions::e(FunctionContext* ctx) { - return DoubleVal(M_E); -} - -// libc++ did not have std::abs for int128 -__int128_t largeint_abs(__int128_t x) { - return x > 0 ? x : -x; -} - -DecimalV2Val MathFunctions::abs(FunctionContext* ctx, const doris_udf::DecimalV2Val& val) { - if (val.is_null) { - return DecimalV2Val::null(); - } - if (UNLIKELY(val.val == MIN_INT128)) { - return DecimalV2Val::null(); - } else { - return DecimalV2Val(largeint_abs(val.val)); - } -} - -LargeIntVal MathFunctions::abs(FunctionContext* ctx, const doris_udf::LargeIntVal& val) { - if (val.is_null) { - return LargeIntVal::null(); - } - if (UNLIKELY(val.val == MIN_INT128)) { - return LargeIntVal::null(); - } else { - return LargeIntVal(largeint_abs(val.val)); - } -} - -LargeIntVal MathFunctions::abs(FunctionContext* ctx, const doris_udf::BigIntVal& val) { - if (val.is_null) { - return LargeIntVal::null(); - } - return LargeIntVal(largeint_abs(__int128(val.val))); -} - -BigIntVal MathFunctions::abs(FunctionContext* ctx, const doris_udf::IntVal& val) { - if (val.is_null) { - return BigIntVal::null(); - } - return BigIntVal(std::abs(int64_t(val.val))); -} - -IntVal MathFunctions::abs(FunctionContext* ctx, const doris_udf::SmallIntVal& val) { - if (val.is_null) { - return IntVal::null(); - } - return IntVal(std::abs(int32_t(val.val))); -} - -SmallIntVal MathFunctions::abs(FunctionContext* ctx, const doris_udf::TinyIntVal& val) { - if (val.is_null) { - return SmallIntVal::null(); - } - return SmallIntVal(std::abs(int16_t(val.val))); -} - -#define LOG_MATH_FN(NAME, RET_TYPE, INPUT_TYPE, FN) \ - RET_TYPE MathFunctions::NAME(FunctionContext* ctx, const INPUT_TYPE& v) { \ - if (v.is_null || v.val <= 0) return RET_TYPE::null(); \ - return RET_TYPE(FN(v.val)); \ - } - -// Generates a UDF that always calls FN() on the input val and returns it. -#define ONE_ARG_MATH_FN(NAME, RET_TYPE, INPUT_TYPE, FN) \ - RET_TYPE MathFunctions::NAME(FunctionContext* ctx, const INPUT_TYPE& v) { \ - if (v.is_null) return RET_TYPE::null(); \ - return RET_TYPE(FN(v.val)); \ - } - -ONE_ARG_MATH_FN(abs, DoubleVal, DoubleVal, std::fabs); -ONE_ARG_MATH_FN(abs, FloatVal, FloatVal, std::fabs); -ONE_ARG_MATH_FN(sin, DoubleVal, DoubleVal, std::sin); -ONE_ARG_MATH_FN(asin, DoubleVal, DoubleVal, std::asin); -ONE_ARG_MATH_FN(cos, DoubleVal, DoubleVal, std::cos); -ONE_ARG_MATH_FN(acos, DoubleVal, DoubleVal, std::acos); -ONE_ARG_MATH_FN(tan, DoubleVal, DoubleVal, std::tan); -ONE_ARG_MATH_FN(atan, DoubleVal, DoubleVal, std::atan); -ONE_ARG_MATH_FN(sqrt, DoubleVal, DoubleVal, std::sqrt); -ONE_ARG_MATH_FN(cbrt, DoubleVal, DoubleVal, std::cbrt); -ONE_ARG_MATH_FN(ceil, BigIntVal, DoubleVal, std::ceil); -ONE_ARG_MATH_FN(floor, BigIntVal, DoubleVal, std::floor); -ONE_ARG_MATH_FN(exp, DoubleVal, DoubleVal, std::exp); -LOG_MATH_FN(ln, DoubleVal, DoubleVal, std::log); -LOG_MATH_FN(log10, DoubleVal, DoubleVal, std::log10); - -TinyIntVal MathFunctions::sign(FunctionContext* ctx, const DoubleVal& v) { - if (v.is_null) { - return TinyIntVal::null(); - } - return TinyIntVal((v.val > 0) ? 1 : ((v.val < 0) ? -1 : 0)); -} - -DoubleVal MathFunctions::radians(FunctionContext* ctx, const DoubleVal& v) { - if (v.is_null) { - return v; - } - return DoubleVal(v.val * M_PI / 180.0); -} - -DoubleVal MathFunctions::degrees(FunctionContext* ctx, const DoubleVal& v) { - if (v.is_null) { - return v; - } - return DoubleVal(v.val * 180.0 / M_PI); -} - -BigIntVal MathFunctions::round(FunctionContext* ctx, const DoubleVal& v) { - if (v.is_null) { - return BigIntVal::null(); - } - return BigIntVal(static_cast(v.val + ((v.val < 0) ? -0.5 : 0.5))); -} - -BigIntVal MathFunctions::round_bankers(FunctionContext* ctx, const DoubleVal& v) { - return BigIntVal(static_cast(round_bankers(ctx, v, IntVal(0)).val)); -} - -DoubleVal MathFunctions::round_bankers(doris_udf::FunctionContext* ctx, const DoubleVal& v, - const IntVal& d) { - const double TOLERANCE = 1e-10; - double shift = std::pow(10, d.val); - double t = v.val * shift; - double rounded = std::round(t); - if (int64_t(rounded) % 2 == 1) { - if (::abs(rounded - t) - 0.5 < TOLERANCE) { - rounded -= 1; - } else { - rounded += 1; - } - } - return DoubleVal(rounded / shift); -} - -DoubleVal MathFunctions::round_up_to(FunctionContext* ctx, const DoubleVal& v, - const IntVal& scale) { - if (v.is_null || scale.is_null) { - return DoubleVal::null(); - } - return DoubleVal(my_double_round(v.val, scale.val, false, false)); -} - -DoubleVal MathFunctions::truncate(FunctionContext* ctx, const DoubleVal& v, const IntVal& scale) { - if (v.is_null || scale.is_null) { - return DoubleVal::null(); - } - return DoubleVal(my_double_round(v.val, scale.val, false, true)); -} - -DoubleVal MathFunctions::log2(FunctionContext* ctx, const DoubleVal& v) { - if (v.is_null || v.val <= 0.0) { - return DoubleVal::null(); - } - return DoubleVal(std::log(v.val) / std::log(2.0)); -} - -const double EPSILON = 1e-9; -DoubleVal MathFunctions::log(FunctionContext* ctx, const DoubleVal& base, const DoubleVal& v) { - if (base.is_null || v.is_null) { - return DoubleVal::null(); - } - if (base.val <= 0 || std::fabs(base.val - 1.0) < EPSILON || v.val <= 0.0) { - return DoubleVal::null(); - } - - return DoubleVal(std::log(v.val) / std::log(base.val)); -} - -DoubleVal MathFunctions::pow(FunctionContext* ctx, const DoubleVal& base, const DoubleVal& exp) { - if (base.is_null || exp.is_null) { - return DoubleVal::null(); - } - return DoubleVal(std::pow(base.val, exp.val)); -} - -void MathFunctions::rand_prepare(FunctionContext* ctx, FunctionContext::FunctionStateScope scope) { - std::mt19937* generator = reinterpret_cast(ctx->allocate(sizeof(std::mt19937))); - if (UNLIKELY(generator == nullptr)) { - LOG(ERROR) << "allocate random seed generator failed."; - return; - } - ctx->set_function_state(scope, generator); - new (generator) std::mt19937(); - if (scope == FunctionContext::THREAD_LOCAL) { - if (ctx->get_num_args() == 1) { - uint32_t seed = 0; - // This is a call to RandSeed, initialize the seed - // TODO: should we support non-constant seed? - if (!ctx->is_arg_constant(0)) { - ctx->set_error("Seed argument to rand() must be constant"); - return; - } - BigIntVal* seed_arg = static_cast(ctx->get_constant_arg(0)); - if (!seed_arg->is_null) { - seed = seed_arg->val; - } - generator->seed(seed); - } else { - generator->seed(std::random_device()()); - } - } -} - -DoubleVal MathFunctions::rand(FunctionContext* ctx) { - std::mt19937* generator = - reinterpret_cast(ctx->get_function_state(FunctionContext::THREAD_LOCAL)); - DCHECK(generator != nullptr); - static const double min = 0.0; - static const double max = 1.0; - std::uniform_real_distribution distribution(min, max); - return DoubleVal(distribution(*generator)); -} - -DoubleVal MathFunctions::rand_seed(FunctionContext* ctx, const BigIntVal& seed) { - if (seed.is_null) { - return DoubleVal::null(); - } - return rand(ctx); -} - -void MathFunctions::rand_close(FunctionContext* ctx, FunctionContext::FunctionStateScope scope) { - if (scope == FunctionContext::THREAD_LOCAL) { - uint8_t* generator = - reinterpret_cast(ctx->get_function_state(FunctionContext::THREAD_LOCAL)); - ctx->free(generator); - ctx->set_function_state(FunctionContext::THREAD_LOCAL, nullptr); - } -} - -StringVal MathFunctions::bin(FunctionContext* ctx, const BigIntVal& v) { - if (v.is_null) { - return StringVal::null(); - } - // Cast to an unsigned integer because it is compiler dependent - // whether the sign bit will be shifted like a regular bit. - // (logical vs. arithmetic shift for signed numbers) - uint64_t n = static_cast(v.val); - const size_t max_bits = sizeof(uint64_t) * 8; - char result[max_bits]; - uint32_t index = max_bits; - do { - result[--index] = '0' + (n & 1); - } while (n >>= 1); - return AnyValUtil::from_buffer_temp(ctx, result + index, max_bits - index); -} - -StringVal MathFunctions::hex_int(FunctionContext* ctx, const BigIntVal& v) { - if (v.is_null) { - return StringVal::null(); - } - - uint64_t num = v.val; - if (num == 0) { - return AnyValUtil::from_string_temp(ctx, "0"); - } - char hex[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; - // uint64_t max value 0xFFFFFFFFFFFFFFFF , 16 'F' - // need 1 more space for '\0' - char ans[17]; - int i = 0; - while (num) { - ans[i++] = hex[num & 15]; - num = num >> 4; - } - ans[i] = '\0'; - // reverse - for (int k = 0, j = i - 1; k <= j; k++, j--) { - char tmp = ans[j]; - ans[j] = ans[k]; - ans[k] = tmp; - } - return AnyValUtil::from_string_temp(ctx, ans); -} - -StringVal MathFunctions::hex_string(FunctionContext* ctx, const StringVal& s) { - if (s.is_null) { - return StringVal::null(); - } - - StringVal result = StringVal::create_temp_string_val(ctx, s.len * 2); - simd::VStringFunctions::hex_encode(s.ptr, s.len, reinterpret_cast(result.ptr)); - return result; -} - -StringVal MathFunctions::unhex(FunctionContext* ctx, const StringVal& s) { - if (s.is_null) { - return StringVal::null(); - } - // For odd number of chars return empty string like Hive does. - if (s.len & 1) { - return StringVal(); - } - - int result_len = s.len / 2; - StringVal result_string_val(ctx, result_len); - char* result = reinterpret_cast(result_string_val.ptr); - int res_index = 0; - int s_index = 0; - while (s_index < s.len) { - char c = 0; - for (int j = 0; j < 2; ++j, ++s_index) { - switch (s.ptr[s_index]) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - c += (s.ptr[s_index] - '0') * ((j == 0) ? 16 : 1); - break; - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - // Map to decimal values [10, 15] - c += (s.ptr[s_index] - 'A' + 10) * ((j == 0) ? 16 : 1); - break; - case 'a': - case 'b': - case 'c': - case 'd': - case 'e': - case 'f': - // Map to decimal [10, 15] - c += (s.ptr[s_index] - 'a' + 10) * ((j == 0) ? 16 : 1); - break; - default: - // Character not in hex alphabet, return empty string. - return StringVal(); - } - } - result[res_index] = c; - ++res_index; - } - return result_string_val; -} - -StringVal MathFunctions::conv_int(FunctionContext* ctx, const BigIntVal& num, - const TinyIntVal& src_base, const TinyIntVal& dest_base) { - if (num.is_null || src_base.is_null || dest_base.is_null) { - return StringVal::null(); - } - // As in MySQL and Hive, min base is 2 and max base is 36. - // (36 is max base representable by alphanumeric chars) - // If a negative target base is given, num should be interpreted in 2's complement. - if (std::abs(src_base.val) < MIN_BASE || std::abs(src_base.val) > MAX_BASE || - std::abs(dest_base.val) < MIN_BASE || std::abs(dest_base.val) > MAX_BASE) { - // Return nullptr like Hive does. - return StringVal::null(); - } - // Invalid input. - if (src_base.val < 0 && num.val >= 0) { - return StringVal::null(); - } - int64_t decimal_num = num.val; - if (src_base.val != 10) { - // Convert src_num representing a number in src_base but encoded in decimal - // into its actual decimal number. - if (!decimal_in_base_to_decimal(num.val, src_base.val, &decimal_num)) { - // Handle overflow, setting decimal_num appropriately. - handle_parse_result(dest_base.val, &decimal_num, StringParser::PARSE_OVERFLOW); - } - } - return decimal_to_base(ctx, decimal_num, dest_base.val); -} - -StringVal MathFunctions::conv_string(FunctionContext* ctx, const StringVal& num_str, - const TinyIntVal& src_base, const TinyIntVal& dest_base) { - if (num_str.is_null || src_base.is_null || dest_base.is_null) { - return StringVal::null(); - } - // As in MySQL and Hive, min base is 2 and max base is 36. - // (36 is max base representable by alphanumeric chars) - // If a negative target base is given, num should be interpreted in 2's complement. - if (std::abs(src_base.val) < MIN_BASE || std::abs(src_base.val) > MAX_BASE || - std::abs(dest_base.val) < MIN_BASE || std::abs(dest_base.val) > MAX_BASE) { - // Return nullptr like Hive does. - return StringVal::null(); - } - // Convert digits in num_str in src_base to decimal. - StringParser::ParseResult parse_res; - int64_t decimal_num = StringParser::string_to_int( - reinterpret_cast(num_str.ptr), num_str.len, src_base.val, &parse_res); - if (src_base.val < 0 && decimal_num >= 0) { - // Invalid input. - return StringVal::null(); - } - if (!handle_parse_result(dest_base.val, &decimal_num, parse_res)) { - // Return 0 for invalid input strings like Hive does. - return StringVal(reinterpret_cast(const_cast("0")), 1); - } - return decimal_to_base(ctx, decimal_num, dest_base.val); -} - StringVal MathFunctions::decimal_to_base(FunctionContext* ctx, int64_t src_num, int8_t dest_base) { // Max number of digits of any base (base 2 gives max digits), plus sign. const size_t max_digits = sizeof(uint64_t) * 8 + 1; @@ -579,153 +171,4 @@ bool MathFunctions::handle_parse_result(int8_t dest_base, int64_t* num, return true; } -BigIntVal MathFunctions::pmod_bigint(FunctionContext* ctx, const BigIntVal& a, const BigIntVal& b) { - if (a.is_null || b.is_null) { - return BigIntVal::null(); - } - return BigIntVal(((a.val % b.val) + b.val) % b.val); -} - -DoubleVal MathFunctions::pmod_double(FunctionContext* ctx, const DoubleVal& a, const DoubleVal& b) { - if (a.is_null || b.is_null) { - return DoubleVal::null(); - } - return DoubleVal(fmod(fmod(a.val, b.val) + b.val, b.val)); -} - -FloatVal MathFunctions::fmod_float(FunctionContext* ctx, const FloatVal& a, const FloatVal& b) { - if (a.is_null || b.is_null || b.val == 0) { - return FloatVal::null(); - } - return FloatVal(fmodf(a.val, b.val)); -} - -DoubleVal MathFunctions::fmod_double(FunctionContext* ctx, const DoubleVal& a, const DoubleVal& b) { - if (a.is_null || b.is_null || b.val == 0) { - return DoubleVal::null(); - } - return DoubleVal(fmod(a.val, b.val)); -} - -BigIntVal MathFunctions::positive_bigint(FunctionContext* ctx, const BigIntVal& val) { - return val; -} - -DoubleVal MathFunctions::positive_double(FunctionContext* ctx, const DoubleVal& val) { - return val; -} - -DecimalV2Val MathFunctions::positive_decimal(FunctionContext* ctx, const DecimalV2Val& val) { - return val; -} - -BigIntVal MathFunctions::negative_bigint(FunctionContext* ctx, const BigIntVal& val) { - if (val.is_null) { - return val; - } - return BigIntVal(-val.val); -} - -DoubleVal MathFunctions::negative_double(FunctionContext* ctx, const DoubleVal& val) { - if (val.is_null) { - return val; - } - return DoubleVal(-val.val); -} - -DecimalV2Val MathFunctions::negative_decimal(FunctionContext* ctx, const DecimalV2Val& val) { - if (val.is_null) { - return val; - } - const DecimalV2Value& dv1 = DecimalV2Value::from_decimal_val(val); - DecimalV2Val result; - (-dv1).to_decimal_val(&result); - return result; -} - -#define LEAST_FN(TYPE) \ - TYPE MathFunctions::least(FunctionContext* ctx, int num_args, const TYPE* args) { \ - if (args[0].is_null) return TYPE::null(); \ - int result_idx = 0; \ - for (int i = 1; i < num_args; ++i) { \ - if (args[i].is_null) return TYPE::null(); \ - if (args[i].val < args[result_idx].val) result_idx = i; \ - } \ - return TYPE(args[result_idx].val); \ - } - -#define LEAST_FNS() \ - LEAST_FN(TinyIntVal); \ - LEAST_FN(SmallIntVal); \ - LEAST_FN(IntVal); \ - LEAST_FN(BigIntVal); \ - LEAST_FN(LargeIntVal); \ - LEAST_FN(FloatVal); \ - LEAST_FN(DoubleVal); - -LEAST_FNS(); - -#define LEAST_NONNUMERIC_FN(TYPE_NAME, TYPE, DORIS_TYPE) \ - TYPE MathFunctions::least(FunctionContext* ctx, int num_args, const TYPE* args) { \ - if (args[0].is_null) return TYPE::null(); \ - DORIS_TYPE result_val = DORIS_TYPE::from_##TYPE_NAME(args[0]); \ - for (int i = 1; i < num_args; ++i) { \ - if (args[i].is_null) return TYPE::null(); \ - DORIS_TYPE val = DORIS_TYPE::from_##TYPE_NAME(args[i]); \ - if (val < result_val) result_val = val; \ - } \ - TYPE result; \ - result_val.to_##TYPE_NAME(&result); \ - return result; \ - } - -#define LEAST_NONNUMERIC_FNS() \ - LEAST_NONNUMERIC_FN(string_val, StringVal, StringRef); \ - LEAST_NONNUMERIC_FN(datetime_val, DateTimeVal, DateTimeValue); \ - LEAST_NONNUMERIC_FN(decimal_val, DecimalV2Val, DecimalV2Value); - -LEAST_NONNUMERIC_FNS(); - -#define GREATEST_FN(TYPE) \ - TYPE MathFunctions::greatest(FunctionContext* ctx, int num_args, const TYPE* args) { \ - if (args[0].is_null) return TYPE::null(); \ - int result_idx = 0; \ - for (int i = 1; i < num_args; ++i) { \ - if (args[i].is_null) return TYPE::null(); \ - if (args[i].val > args[result_idx].val) result_idx = i; \ - } \ - return TYPE(args[result_idx].val); \ - } - -#define GREATEST_FNS() \ - GREATEST_FN(TinyIntVal); \ - GREATEST_FN(SmallIntVal); \ - GREATEST_FN(IntVal); \ - GREATEST_FN(BigIntVal); \ - GREATEST_FN(LargeIntVal); \ - GREATEST_FN(FloatVal); \ - GREATEST_FN(DoubleVal); - -GREATEST_FNS(); - -#define GREATEST_NONNUMERIC_FN(TYPE_NAME, TYPE, DORIS_TYPE) \ - TYPE MathFunctions::greatest(FunctionContext* ctx, int num_args, const TYPE* args) { \ - if (args[0].is_null) return TYPE::null(); \ - DORIS_TYPE result_val = DORIS_TYPE::from_##TYPE_NAME(args[0]); \ - for (int i = 1; i < num_args; ++i) { \ - if (args[i].is_null) return TYPE::null(); \ - DORIS_TYPE val = DORIS_TYPE::from_##TYPE_NAME(args[i]); \ - if (val > result_val) result_val = val; \ - } \ - TYPE result; \ - result_val.to_##TYPE_NAME(&result); \ - return result; \ - } - -#define GREATEST_NONNUMERIC_FNS() \ - GREATEST_NONNUMERIC_FN(string_val, StringVal, StringRef); \ - GREATEST_NONNUMERIC_FN(datetime_val, DateTimeVal, DateTimeValue); \ - GREATEST_NONNUMERIC_FN(decimal_val, DecimalV2Val, DecimalV2Value); - -GREATEST_NONNUMERIC_FNS(); } // namespace doris diff --git a/be/src/exprs/math_functions.h b/be/src/exprs/math_functions.h index cac13ef6c0..257916f61d 100644 --- a/be/src/exprs/math_functions.h +++ b/be/src/exprs/math_functions.h @@ -28,163 +28,6 @@ namespace doris { class MathFunctions { public: - static void init(); - - static doris_udf::DoubleVal pi(doris_udf::FunctionContext* ctx); - static doris_udf::DoubleVal e(doris_udf::FunctionContext* ctx); - - static doris_udf::DoubleVal abs(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::FloatVal abs(doris_udf::FunctionContext*, const doris_udf::FloatVal&); - static doris_udf::DecimalV2Val abs(doris_udf::FunctionContext*, const doris_udf::DecimalV2Val&); - - // For integer math, we have to promote ABS() to the next highest integer type because - // in two's complement arithmetic, the largest negative value for any bit width is not - // representable as a positive value within the same width. For the largest width, we - // simply overflow. In the unlikely event a workaround is needed, one can simply cast - // to a higher precision decimal type. - static doris_udf::LargeIntVal abs(doris_udf::FunctionContext*, const doris_udf::LargeIntVal&); - static doris_udf::LargeIntVal abs(doris_udf::FunctionContext*, const doris_udf::BigIntVal&); - static doris_udf::BigIntVal abs(doris_udf::FunctionContext*, const doris_udf::IntVal&); - static doris_udf::IntVal abs(doris_udf::FunctionContext*, const doris_udf::SmallIntVal&); - static doris_udf::SmallIntVal abs(doris_udf::FunctionContext*, const doris_udf::TinyIntVal&); - - static doris_udf::TinyIntVal sign(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& v); - - static doris_udf::DoubleVal sin(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::DoubleVal asin(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::DoubleVal cos(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::DoubleVal acos(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::DoubleVal tan(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::DoubleVal atan(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - - static doris_udf::BigIntVal ceil(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::BigIntVal floor(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::BigIntVal round(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& v); - static doris_udf::BigIntVal round_bankers(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& v); - static doris_udf::DoubleVal round_bankers(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& v, - const doris_udf::IntVal& scale); - static doris_udf::DoubleVal round_up_to(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& v, - const doris_udf::IntVal& scale); - static doris_udf::DoubleVal truncate(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& v, - const doris_udf::IntVal& scale); - - static doris_udf::DoubleVal ln(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::DoubleVal log(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& base, - const doris_udf::DoubleVal& v); - static doris_udf::DoubleVal log2(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& v); - static doris_udf::DoubleVal log10(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::DoubleVal exp(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - - static doris_udf::DoubleVal radians(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& v); - static doris_udf::DoubleVal degrees(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& v); - - static doris_udf::DoubleVal sqrt(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::DoubleVal cbrt(doris_udf::FunctionContext*, const doris_udf::DoubleVal&); - static doris_udf::DoubleVal pow(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& base, - const doris_udf::DoubleVal& exp); - - /// Used for both rand() and rand_seed() - static void rand_prepare(doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); - static doris_udf::DoubleVal rand(doris_udf::FunctionContext*); - static doris_udf::DoubleVal rand_seed(doris_udf::FunctionContext*, - const doris_udf::BigIntVal& seed); - static void rand_close(FunctionContext* ctx, FunctionContext::FunctionStateScope scope); - - static doris_udf::StringVal bin(doris_udf::FunctionContext* ctx, const doris_udf::BigIntVal& v); - static doris_udf::StringVal hex_int(doris_udf::FunctionContext* ctx, - const doris_udf::BigIntVal& v); - static doris_udf::StringVal hex_string(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& s); - static doris_udf::StringVal unhex(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& s); - - static doris_udf::StringVal conv_int(doris_udf::FunctionContext* ctx, - const doris_udf::BigIntVal& num, - const doris_udf::TinyIntVal& src_base, - const doris_udf::TinyIntVal& dest_base); - static doris_udf::StringVal conv_string(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& num_str, - const doris_udf::TinyIntVal& src_base, - const doris_udf::TinyIntVal& dest_base); - - static doris_udf::BigIntVal pmod_bigint(doris_udf::FunctionContext* ctx, - const doris_udf::BigIntVal& a, - const doris_udf::BigIntVal& b); - static doris_udf::DoubleVal pmod_double(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& a, - const doris_udf::DoubleVal& b); - static doris_udf::FloatVal fmod_float(doris_udf::FunctionContext*, const doris_udf::FloatVal&, - const doris_udf::FloatVal&); - static doris_udf::DoubleVal fmod_double(doris_udf::FunctionContext*, - const doris_udf::DoubleVal&, - const doris_udf::DoubleVal&); - - static doris_udf::BigIntVal positive_bigint(doris_udf::FunctionContext* ctx, - const doris_udf::BigIntVal& val); - static doris_udf::DoubleVal positive_double(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& val); - static doris_udf::DecimalV2Val positive_decimal(doris_udf::FunctionContext* ctx, - const doris_udf::DecimalV2Val& val); - static doris_udf::BigIntVal negative_bigint(doris_udf::FunctionContext* ctx, - const doris_udf::BigIntVal& val); - static doris_udf::DoubleVal negative_double(doris_udf::FunctionContext* ctx, - const doris_udf::DoubleVal& val); - static doris_udf::DecimalV2Val negative_decimal(doris_udf::FunctionContext* ctx, - const doris_udf::DecimalV2Val& val); - - static doris_udf::TinyIntVal least(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::TinyIntVal* args); - static doris_udf::TinyIntVal greatest(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::TinyIntVal* args); - static doris_udf::SmallIntVal least(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::SmallIntVal* val); - static doris_udf::SmallIntVal greatest(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::SmallIntVal* val); - static doris_udf::IntVal least(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::IntVal* val); - static doris_udf::IntVal greatest(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::IntVal* val); - static doris_udf::BigIntVal least(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::BigIntVal* val); - static doris_udf::BigIntVal greatest(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::BigIntVal* val); - static doris_udf::LargeIntVal least(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::LargeIntVal* val); - static doris_udf::LargeIntVal greatest(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::LargeIntVal* val); - static doris_udf::FloatVal least(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::FloatVal* val); - static doris_udf::FloatVal greatest(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::FloatVal* val); - static doris_udf::DoubleVal least(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::DoubleVal* val); - static doris_udf::DoubleVal greatest(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::DoubleVal* val); - static doris_udf::StringVal least(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::StringVal* val); - static doris_udf::StringVal greatest(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::StringVal* val); - static doris_udf::DateTimeVal least(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::DateTimeVal* val); - static doris_udf::DateTimeVal greatest(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::DateTimeVal* val); - static doris_udf::DecimalV2Val least(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::DecimalV2Val* val); - static doris_udf::DecimalV2Val greatest(doris_udf::FunctionContext* ctx, int num_args, - const doris_udf::DecimalV2Val* val); - static double my_double_round(double value, int64_t dec, bool dec_unsigned, bool truncate); // Converts src_num in decimal to dest_base, diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp index 0f9e3f5b19..6ca42affb4 100644 --- a/be/src/exprs/string_functions.cpp +++ b/be/src/exprs/string_functions.cpp @@ -32,461 +32,6 @@ // NOTE: be careful not to use string::append. It is not performant. namespace doris { -void StringFunctions::init() {} - -size_t get_char_len(const StringVal& str, std::vector* str_index) { - size_t char_len = 0; - for (size_t i = 0, char_size = 0; i < str.len; i += char_size) { - char_size = UTF8_BYTE_LENGTH[(unsigned char)(str.ptr)[i]]; - str_index->push_back(i); - ++char_len; - } - return char_len; -} - -// This behaves identically to the mysql implementation, namely: -// - 1-indexed positions -// - supported negative positions (count from the end of the string) -// - [optional] len. No len indicates longest substr possible -StringVal StringFunctions::substring(FunctionContext* context, const StringVal& str, - const IntVal& pos, const IntVal& len) { - if (str.is_null || pos.is_null || len.is_null) { - return StringVal::null(); - } - if (len.val <= 0 || str.len == 0 || pos.val == 0 || pos.val > str.len) { - return StringVal(); - } - - // create index indicate every char start byte - // e.g. "hello word 你好" => [0,1,2,3,4,5,6,7,8,9,10,11,14] 你 and 好 are 3 bytes - // why use a vector as index? It is unnecessary if there is no negative pos val, - // but if has pos is negative it is not easy to determine where to start, so need a - // index save every character's length - size_t byte_pos = 0; - std::vector index; - for (size_t i = 0, char_size = 0; i < str.len; i += char_size) { - char_size = UTF8_BYTE_LENGTH[(unsigned char)(str.ptr)[i]]; - index.push_back(i); - if (pos.val > 0 && index.size() > pos.val + len.val) { - break; - } - } - - int fixed_pos = pos.val; - if (fixed_pos < 0) { - fixed_pos = index.size() + fixed_pos + 1; - } - if (fixed_pos > index.size()) { - return StringVal::null(); - } - byte_pos = index[fixed_pos - 1]; - int fixed_len = str.len - byte_pos; - if (fixed_pos + len.val <= index.size()) { - fixed_len = index[fixed_pos + len.val - 1] - byte_pos; - } - if (byte_pos <= str.len && fixed_len > 0) { - return StringVal(str.ptr + byte_pos, fixed_len); - } else { - return StringVal(); - } -} - -StringVal StringFunctions::substring(FunctionContext* context, const StringVal& str, - const IntVal& pos) { - // StringVal.len is an int => INT32_MAX - return substring(context, str, pos, IntVal(INT32_MAX)); -} - -// Implementation of Left. The signature is -// string left(string input, int len) -// This behaves identically to the mysql implementation. -StringVal StringFunctions::left(FunctionContext* context, const StringVal& str, const IntVal& len) { - if (len.val >= str.len) return str; - return substring(context, str, 1, len); -} - -// Implementation of Right. The signature is -// string right(string input, int len) -// This behaves identically to the mysql implementation. -StringVal StringFunctions::right(FunctionContext* context, const StringVal& str, - const IntVal& len) { - // Don't index past the beginning of str, otherwise we'll get an empty string back - int32_t pos = std::max(-len.val, static_cast(-str.len)); - return substring(context, str, IntVal(pos), len); -} - -BooleanVal StringFunctions::starts_with(FunctionContext* context, const StringVal& str, - const StringVal& prefix) { - if (str.is_null || prefix.is_null) { - return BooleanVal::null(); - } - re2::StringPiece str_sp(reinterpret_cast(str.ptr), str.len); - re2::StringPiece prefix_sp(reinterpret_cast(prefix.ptr), prefix.len); - return BooleanVal(str_sp.starts_with(prefix_sp)); -} - -BooleanVal StringFunctions::ends_with(FunctionContext* context, const StringVal& str, - const StringVal& suffix) { - if (str.is_null || suffix.is_null) { - return BooleanVal::null(); - } - re2::StringPiece str_sp(reinterpret_cast(str.ptr), str.len); - re2::StringPiece suffix_sp(reinterpret_cast(suffix.ptr), suffix.len); - return BooleanVal(str_sp.ends_with(suffix_sp)); -} - -BooleanVal StringFunctions::null_or_empty(FunctionContext* context, const StringVal& str) { - if (str.is_null || str.len == 0) { - return 1; - } else { - return 0; - } -} - -BooleanVal StringFunctions::not_null_or_empty(FunctionContext* context, const StringVal& str) { - if (str.is_null || str.len == 0) { - return 0; - } else { - return 1; - } -} - -StringVal StringFunctions::space(FunctionContext* context, const IntVal& len) { - if (len.is_null) { - return StringVal::null(); - } - if (len.val <= 0) { - return StringVal(); - } - int32_t space_size = std::min(len.val, 65535); - // TODO pengyubing - // StringVal result = StringVal::create_temp_string_val(context, space_size); - StringVal result(context, space_size); - memset(result.ptr, ' ', space_size); - return result; -} - -StringVal StringFunctions::repeat(FunctionContext* context, const StringVal& str, const IntVal& n) { - if (str.is_null || n.is_null) { - return StringVal::null(); - } - if (str.len == 0 || n.val <= 0) { - return StringVal(); - } - - // TODO pengyubing - // StringVal result = StringVal::create_temp_string_val(context, str.len * n.val); - StringVal result(context, str.len * n.val); - if (UNLIKELY(result.is_null)) { - return result; - } - uint8_t* ptr = result.ptr; - for (int64_t i = 0; i < n.val; ++i) { - memcpy(ptr, str.ptr, str.len); - ptr += str.len; - } - return result; -} - -StringVal StringFunctions::lpad(FunctionContext* context, const StringVal& str, const IntVal& len, - const StringVal& pad) { - if (str.is_null || len.is_null || pad.is_null || len.val < 0) { - return StringVal::null(); - } - - std::vector str_index; - size_t str_char_size = get_char_len(str, &str_index); - std::vector pad_index; - size_t pad_char_size = get_char_len(pad, &pad_index); - - // Corner cases: Shrink the original string, or leave it alone. - // TODO: Hive seems to go into an infinite loop if pad.len == 0, - // so we should pay attention to Hive's future solution to be compatible. - if (len.val <= str_char_size || pad.len == 0) { - if (len.val > str_index.size()) { - return StringVal::null(); - } - if (len.val == str_index.size()) { - return StringVal(str.ptr, str.len); - } - return StringVal(str.ptr, str_index[len.val]); - } - - // TODO pengyubing - // StringVal result = StringVal::create_temp_string_val(context, len.val); - int32_t pad_byte_len = 0; - int32_t pad_times = (len.val - str_char_size) / pad_char_size; - int32_t pad_remainder = (len.val - str_char_size) % pad_char_size; - pad_byte_len = pad_times * pad.len; - pad_byte_len += pad_index[pad_remainder]; - int32_t byte_len = str.len + pad_byte_len; - StringVal result(context, byte_len); - if (result.is_null) { - return result; - } - int pad_idx = 0; - int result_index = 0; - uint8_t* ptr = result.ptr; - - // Prepend chars of pad. - while (result_index < pad_byte_len) { - ptr[result_index++] = pad.ptr[pad_idx++]; - pad_idx = pad_idx % pad.len; - } - - // Append given string. - memcpy(ptr + result_index, str.ptr, str.len); - return result; -} - -StringVal StringFunctions::rpad(FunctionContext* context, const StringVal& str, const IntVal& len, - const StringVal& pad) { - if (str.is_null || len.is_null || pad.is_null || len.val < 0) { - return StringVal::null(); - } - - std::vector str_index; - size_t str_char_size = get_char_len(str, &str_index); - std::vector pad_index; - size_t pad_char_size = get_char_len(pad, &pad_index); - - // Corner cases: Shrink the original string, or leave it alone. - // TODO: Hive seems to go into an infinite loop if pad->len == 0, - // so we should pay attention to Hive's future solution to be compatible. - if (len.val <= str_char_size || pad.len == 0) { - if (len.val > str_index.size()) { - return StringVal::null(); - } - if (len.val == str_index.size()) { - return StringVal(str.ptr, str.len); - } - return StringVal(str.ptr, str_index[len.val]); - } - - // TODO pengyubing - // StringVal result = StringVal::create_temp_string_val(context, len.val); - int32_t pad_byte_len = 0; - int32_t pad_times = (len.val - str_char_size) / pad_char_size; - int32_t pad_remainder = (len.val - str_char_size) % pad_char_size; - pad_byte_len = pad_times * pad.len; - pad_byte_len += pad_index[pad_remainder]; - int32_t byte_len = str.len + pad_byte_len; - StringVal result(context, byte_len); - if (UNLIKELY(result.is_null)) { - return result; - } - memcpy(result.ptr, str.ptr, str.len); - - // Append chars of pad until desired length - uint8_t* ptr = result.ptr; - int pad_idx = 0; - int result_len = str.len; - while (result_len < byte_len) { - ptr[result_len++] = pad.ptr[pad_idx++]; - pad_idx = pad_idx % pad.len; - } - return result; -} - -StringVal StringFunctions::append_trailing_char_if_absent( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::StringVal& trailing_char) { - if (str.is_null || trailing_char.is_null || trailing_char.len != 1) { - return StringVal::null(); - } - if (str.len == 0) { - return trailing_char; - } - if (str.ptr[str.len - 1] == trailing_char.ptr[0]) { - return str; - } - - StringVal result(context, str.len + 1); - memcpy(result.ptr, str.ptr, str.len); - result.ptr[str.len] = trailing_char.ptr[0]; - return result; -} - -// Implementation of LENGTH -// int length(string input) -// Returns the length in bytes of input. If input == nullptr, returns -// nullptr per MySQL -IntVal StringFunctions::length(FunctionContext* context, const StringVal& str) { - if (str.is_null) { - return IntVal::null(); - } - return IntVal(str.len); -} - -// Implementation of CHAR_LENGTH -// int char_utf8_length(string input) -// Returns the length of characters of input. If input == nullptr, returns -// nullptr per MySQL -IntVal StringFunctions::char_utf8_length(FunctionContext* context, const StringVal& str) { - if (str.is_null) { - return IntVal::null(); - } - size_t char_len = 0; - for (size_t i = 0, char_size = 0; i < str.len; i += char_size) { - char_size = UTF8_BYTE_LENGTH[(unsigned char)(str.ptr)[i]]; - ++char_len; - } - return IntVal(char_len); -} - -StringVal StringFunctions::lower(FunctionContext* context, const StringVal& str) { - if (str.is_null) { - return StringVal::null(); - } - StringVal result(context, str.len); - if (UNLIKELY(result.is_null)) { - return result; - } - simd::VStringFunctions::to_lower(str.ptr, str.len, result.ptr); - return result; -} - -StringVal StringFunctions::upper(FunctionContext* context, const StringVal& str) { - if (str.is_null) { - return StringVal::null(); - } - StringVal result(context, str.len); - if (UNLIKELY(result.is_null)) { - return result; - } - simd::VStringFunctions::to_upper(str.ptr, str.len, result.ptr); - return result; -} - -StringVal StringFunctions::initcap(FunctionContext* context, const StringVal& str) { - if (str.is_null) { - return StringVal::null(); - } - StringVal result(context, str.len); - - simd::VStringFunctions::to_lower(str.ptr, str.len, result.ptr); - - bool need_capitalize = true; - for (int64_t i = 0; i < str.len; ++i) { - if (!::isalnum(result.ptr[i])) { - need_capitalize = true; - } else if (need_capitalize) { - result.ptr[i] = ::toupper(result.ptr[i]); - need_capitalize = false; - } - } - - return result; -} - -StringVal StringFunctions::reverse(FunctionContext* context, const StringVal& str) { - if (str.is_null) { - return StringVal::null(); - } - - StringVal result(context, str.len); - if (UNLIKELY(result.is_null)) { - return result; - } - - simd::VStringFunctions::reverse(str, result); - return result; -} - -StringVal StringFunctions::trim(FunctionContext* context, const StringVal& str) { - return simd::VStringFunctions::trim(str); -} - -StringVal StringFunctions::ltrim(FunctionContext* context, const StringVal& str) { - return simd::VStringFunctions::ltrim(str); -} - -StringVal StringFunctions::rtrim(FunctionContext* context, const StringVal& str) { - return simd::VStringFunctions::rtrim(str); -} - -IntVal StringFunctions::ascii(FunctionContext* context, const StringVal& str) { - if (str.is_null) { - return IntVal::null(); - } - // Hive returns 0 when given an empty string. - return IntVal((str.len == 0) ? 0 : static_cast(str.ptr[0])); -} - -IntVal StringFunctions::instr(FunctionContext* context, const StringVal& str, - const StringVal& substr) { - if (str.is_null || substr.is_null) { - return IntVal::null(); - } - if (substr.len == 0) { - return IntVal(1); - } - StringRef str_sv = StringRef(str); - StringRef substr_sv = StringRef(substr); - StringSearch search(&substr_sv); - // Hive returns positions starting from 1. - int loc = search.search(&str_sv); - if (loc > 0) { - size_t char_len = 0; - for (size_t i = 0, char_size = 0; i < loc; i += char_size) { - char_size = UTF8_BYTE_LENGTH[(unsigned char)(str.ptr)[i]]; - ++char_len; - } - loc = char_len; - } - - return IntVal(loc + 1); -} - -IntVal StringFunctions::locate(FunctionContext* context, const StringVal& substr, - const StringVal& str) { - return instr(context, str, substr); -} - -IntVal StringFunctions::locate_pos(FunctionContext* context, const StringVal& substr, - const StringVal& str, const IntVal& start_pos) { - if (str.is_null || substr.is_null || start_pos.is_null) { - return IntVal::null(); - } - if (substr.len == 0) { - if (start_pos.val <= 0) { - return IntVal(0); - } else if (start_pos.val == 1) { - return IntVal(1); - } else if (start_pos.val > str.len) { - return IntVal(0); - } else { - return IntVal(start_pos.val); - } - } - // Hive returns 0 for *start_pos <= 0, - // but throws an exception for *start_pos > str->len. - // Since returning 0 seems to be Hive's error condition, return 0. - std::vector index; - size_t char_len = get_char_len(str, &index); - if (start_pos.val <= 0 || start_pos.val > str.len || start_pos.val > char_len) { - return IntVal(0); - } - StringRef substr_sv = StringRef(substr); - StringSearch search(&substr_sv); - // Input start_pos.val starts from 1. - StringRef adjusted_str(reinterpret_cast(str.ptr) + index[start_pos.val - 1], - str.len - index[start_pos.val - 1]); - int32_t match_pos = search.search(&adjusted_str); - if (match_pos >= 0) { - // Hive returns the position in the original string starting from 1. - size_t char_len = 0; - for (size_t i = 0, char_size = 0; i < match_pos; i += char_size) { - char_size = UTF8_BYTE_LENGTH[(unsigned char)(adjusted_str.data)[i]]; - ++char_len; - } - match_pos = char_len; - return IntVal(start_pos.val + match_pos); - } else { - return IntVal(0); - } -} - // This function sets options in the RE2 library before pattern matching. bool StringFunctions::set_re2_options(const StringVal& match_parameter, std::string* error_str, re2::RE2::Options* opts) { @@ -544,513 +89,4 @@ re2::RE2* StringFunctions::compile_regex(const StringVal& pattern, std::string* return re; } -void StringFunctions::regexp_prepare(FunctionContext* context, - FunctionContext::FunctionStateScope scope) { - if (scope != FunctionContext::FRAGMENT_LOCAL) { - return; - } - - if (!context->is_arg_constant(1)) { - return; - } - StringVal* pattern = reinterpret_cast(context->get_constant_arg(1)); - if (pattern->is_null) { - return; - } - std::string error_str; - re2::RE2* re = compile_regex(*pattern, &error_str, StringVal::null()); - if (re == nullptr) { - context->set_error(error_str.c_str()); - return; - } - context->set_function_state(scope, re); -} - -void StringFunctions::regexp_close(FunctionContext* context, - FunctionContext::FunctionStateScope scope) { - if (scope != FunctionContext::FRAGMENT_LOCAL) { - return; - } - re2::RE2* re = reinterpret_cast(context->get_function_state(scope)); - delete re; -} - -StringVal StringFunctions::regexp_extract(FunctionContext* context, const StringVal& str, - const StringVal& pattern, const BigIntVal& index) { - if (str.is_null || pattern.is_null || index.is_null) { - return StringVal::null(); - } - if (index.val < 0) { - return StringVal(); - } - - re2::RE2* re = reinterpret_cast( - context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); - std::unique_ptr scoped_re; // destroys re if we have to locally compile it - if (re == nullptr) { - DCHECK(!context->is_arg_constant(1)); - std::string error_str; - re = compile_regex(pattern, &error_str, StringVal::null()); - if (re == nullptr) { - context->add_warning(error_str.c_str()); - return StringVal::null(); - } - scoped_re.reset(re); - } - - re2::StringPiece str_sp(reinterpret_cast(str.ptr), str.len); - int max_matches = 1 + re->NumberOfCapturingGroups(); - if (index.val >= max_matches) { - return StringVal(); - } - // Use a vector because clang complains about non-POD varlen arrays - // TODO: fix this - std::vector matches(max_matches); - bool success = re->Match(str_sp, 0, str.len, re2::RE2::UNANCHORED, &matches[0], max_matches); - if (!success) { - return StringVal(); - } - // matches[0] is the whole string, matches[1] the first group, etc. - const re2::StringPiece& match = matches[index.val]; - return AnyValUtil::from_buffer_temp(context, match.data(), match.size()); -} - -StringVal StringFunctions::regexp_replace(FunctionContext* context, const StringVal& str, - const StringVal& pattern, const StringVal& replace) { - if (str.is_null || pattern.is_null || replace.is_null) { - return StringVal::null(); - } - - re2::RE2* re = reinterpret_cast( - context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); - std::unique_ptr scoped_re; // destroys re if state->re is nullptr - if (re == nullptr) { - DCHECK(!context->is_arg_constant(1)); - std::string error_str; - re = compile_regex(pattern, &error_str, StringVal::null()); - if (re == nullptr) { - context->add_warning(error_str.c_str()); - return StringVal::null(); - } - scoped_re.reset(re); - } - - re2::StringPiece replace_str = - re2::StringPiece(reinterpret_cast(replace.ptr), replace.len); - std::string result_str = AnyValUtil::to_string(str); - re2::RE2::GlobalReplace(&result_str, *re, replace_str); - return AnyValUtil::from_string_temp(context, result_str); -} - -StringVal StringFunctions::concat(FunctionContext* context, int num_children, - const StringVal* strs) { - DCHECK_GE(num_children, 1); - - // Pass through if there's only one argument - if (num_children == 1) { - return strs[0]; - } - - // Loop once to compute the final size and reserve space. - int32_t total_size = 0; - for (int32_t i = 0; i < num_children; ++i) { - if (strs[i].is_null) { - return StringVal::null(); - } - total_size += strs[i].len; - } - - StringVal result(context, total_size); - uint8_t* ptr = result.ptr; - - // Loop again to append the data. - for (int32_t i = 0; i < num_children; ++i) { - memcpy(ptr, strs[i].ptr, strs[i].len); - ptr += strs[i].len; - } - return result; -} - -StringVal StringFunctions::concat_ws(FunctionContext* context, const StringVal& sep, - int num_children, const StringVal* strs) { - DCHECK_GE(num_children, 1); - if (sep.is_null) { - return StringVal::null(); - } - - // Loop once to compute the final size and reserve space. - int32_t total_size = 0; - bool not_first = false; - for (int32_t i = 0; i < num_children; ++i) { - if (strs[i].is_null) { - continue; - } - if (not_first) { - total_size += sep.len; - } - total_size += strs[i].len; - not_first = true; - } - - StringVal result(context, total_size); - uint8_t* ptr = result.ptr; - not_first = false; - // Loop again to append the data. - for (int32_t i = 0; i < num_children; ++i) { - if (strs[i].is_null) { - continue; - } - if (not_first) { - memcpy(ptr, sep.ptr, sep.len); - ptr += sep.len; - } - memcpy(ptr, strs[i].ptr, strs[i].len); - ptr += strs[i].len; - not_first = true; - } - return result; -} - -StringVal StringFunctions::elt(FunctionContext* context, const IntVal& pos, int num_children, - const StringVal* strs) { - if (pos.is_null || pos.val < 1 || num_children == 0 || pos.val > num_children) { - return StringVal::null(); - } - - return strs[pos.val - 1]; -} - -IntVal StringFunctions::find_in_set(FunctionContext* context, const StringVal& str, - const StringVal& str_set) { - if (str.is_null || str_set.is_null) { - return IntVal::null(); - } - // Check str for commas. - for (int i = 0; i < str.len; ++i) { - if (str.ptr[i] == ',') { - return IntVal(0); - } - } - // The result index starts from 1 since 0 is an error condition. - int32_t token_index = 1; - int32_t start = 0; - int32_t end; - StringRef str_sv = StringRef(str); - do { - end = start; - // Position end. - while (end < str_set.len && str_set.ptr[end] != ',') { - ++end; - } - StringRef token(reinterpret_cast(str_set.ptr) + start, end - start); - if (str_sv.eq(token)) { - return IntVal(token_index); - } - - // Re-position start and end past ',' - start = end + 1; - ++token_index; - } while (start < str_set.len); - return IntVal(0); -} - -void StringFunctions::parse_url_prepare(FunctionContext* ctx, - FunctionContext::FunctionStateScope scope) { - if (scope != FunctionContext::FRAGMENT_LOCAL) { - return; - } - if (!ctx->is_arg_constant(1)) { - return; - } - StringVal* part = reinterpret_cast(ctx->get_constant_arg(1)); - if (part->is_null) { - return; - } - UrlParser::UrlPart* url_part = new UrlParser::UrlPart; - *url_part = UrlParser::get_url_part(StringRef(*part)); - if (*url_part == UrlParser::INVALID) { - std::stringstream ss; - ss << "Invalid URL part: " << AnyValUtil::to_string(*part) << std::endl - << "(Valid URL parts are 'PROTOCOL', 'HOST', 'PATH', 'REF', 'AUTHORITY', 'FILE', " - << "'USERINFO', 'PORT' and 'QUERY')"; - ctx->set_error(ss.str().c_str()); - return; - } - ctx->set_function_state(scope, url_part); -} - -StringVal StringFunctions::parse_url(FunctionContext* ctx, const StringVal& url, - const StringVal& part) { - if (url.is_null || part.is_null) { - return StringVal::null(); - } - std::string part_str = std::string(reinterpret_cast(part.ptr), part.len); - transform(part_str.begin(), part_str.end(), part_str.begin(), ::toupper); - StringVal newPart = AnyValUtil::from_string_temp(ctx, part_str); - void* state = ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL); - UrlParser::UrlPart url_part; - if (state != nullptr) { - url_part = *reinterpret_cast(state); - } else { - DCHECK(!ctx->is_arg_constant(1)); - url_part = UrlParser::get_url_part(StringRef(newPart)); - } - - StringRef result; - if (!UrlParser::parse_url(StringRef(url), url_part, &result)) { - // url is malformed, or url_part is invalid. - if (url_part == UrlParser::INVALID) { - std::stringstream ss; - ss << "Invalid URL part: " << AnyValUtil::to_string(newPart); - ctx->add_warning(ss.str().c_str()); - } else { - std::stringstream ss; - ss << "Could not parse URL: " << AnyValUtil::to_string(url); - ctx->add_warning(ss.str().c_str()); - } - return StringVal::null(); - } - StringVal result_sv; - result.to_string_val(&result_sv); - return result_sv; -} - -void StringFunctions::parse_url_close(FunctionContext* ctx, - FunctionContext::FunctionStateScope scope) { - if (scope != FunctionContext::FRAGMENT_LOCAL) { - return; - } - UrlParser::UrlPart* url_part = - reinterpret_cast(ctx->get_function_state(scope)); - delete url_part; -} - -StringVal StringFunctions::parse_url_key(FunctionContext* ctx, const StringVal& url, - const StringVal& part, const StringVal& key) { - if (url.is_null || part.is_null || key.is_null) { - return StringVal::null(); - } - void* state = ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL); - UrlParser::UrlPart url_part; - if (state != nullptr) { - url_part = *reinterpret_cast(state); - } else { - DCHECK(!ctx->is_arg_constant(1)); - url_part = UrlParser::get_url_part(StringRef(part)); - } - - StringRef result; - if (!UrlParser::parse_url_key(StringRef(url), url_part, StringRef(key), &result)) { - // url is malformed, or url_part is invalid. - if (url_part == UrlParser::INVALID) { - std::stringstream ss; - ss << "Invalid URL part: " << AnyValUtil::to_string(part); - ctx->add_warning(ss.str().c_str()); - } else { - std::stringstream ss; - ss << "Could not parse URL: " << AnyValUtil::to_string(url); - ctx->add_warning(ss.str().c_str()); - } - return StringVal::null(); - } - StringVal result_sv; - result.to_string_val(&result_sv); - return result_sv; -} - -StringVal StringFunctions::money_format(FunctionContext* context, const DoubleVal& v) { - if (v.is_null) { - return StringVal::null(); - } - double v_cent = MathFunctions::my_double_round(v.val, 2, false, false); - return do_money_format(context, fmt::format("{:.2f}", v_cent)); -} - -StringVal StringFunctions::money_format(FunctionContext* context, const DecimalV2Val& v) { - if (v.is_null) { - return StringVal::null(); - } - - DecimalV2Value rounded(0); - DecimalV2Value::from_decimal_val(v).round(&rounded, 2, HALF_UP); - return do_money_format(context, rounded.int_value(), - abs(rounded.frac_value() / 10000000)); -} - -StringVal StringFunctions::money_format(FunctionContext* context, const BigIntVal& v) { - if (v.is_null) { - return StringVal::null(); - } - return do_money_format(context, v.val); -} - -StringVal StringFunctions::money_format(FunctionContext* context, const LargeIntVal& v) { - if (v.is_null) { - return StringVal::null(); - } - return do_money_format<__int128_t, 52>(context, v.val); -} - -static int index_of(const uint8_t* source, int source_offset, int source_count, - const uint8_t* target, int target_offset, int target_count, int from_index) { - if (from_index >= source_count) { - return (target_count == 0 ? source_count : -1); - } - if (from_index < 0) { - from_index = 0; - } - if (target_count == 0) { - return from_index; - } - const uint8_t first = target[target_offset]; - int max = source_offset + (source_count - target_count); - for (int i = source_offset + from_index; i <= max; i++) { - while (i <= max && source[i] != first) i++; // Look for first character - if (i <= max) { // Found first character, now look at the rest of v2 - int j = i + 1; - int end = j + target_count - 1; - for (int k = target_offset + 1; j < end && source[j] == target[k]; j++, k++) - ; - if (j == end) { - return i - source_offset; // Found whole string. - } - } - } - return -1; -} - -static int last_index_of(const uint8_t* source, int source_len, const uint8_t* target, - int target_len, int to_index) { - if (to_index < 0) { - return -1; - } - if (to_index >= source_len) { - to_index = source_len - 1; - } - if (target_len == 0) { - return to_index; - } - const uint8_t last = target[target_len - 1]; - int min = target_len; - for (int i = to_index; i >= min; i--) { - while (i >= min && source[i] != last) { - i--; // Look for last character - } - if (i >= min) { // Found first character, now look at the rest of v2 - int j = i - 1; - int end = j - target_len + 1; - for (int k = target_len - 2; j > end && source[j] == target[k];) { - j--; - k--; - } - if (j == end) { - return i - target_len + 1; - } - } - } - return -1; -} - -StringVal StringFunctions::split_part(FunctionContext* context, const StringVal& content, - const StringVal& delimiter, const IntVal& field) { - if (content.is_null || delimiter.is_null || field.is_null || field.val == 0) { - return StringVal::null(); - } - - if (field.val > 0) { - int from = 0; - std::vector find(field.val, -1); //store substring position - for (int i = 1; i <= field.val; i++) { // find - int last_index = i - 1; - find[last_index] = - index_of(content.ptr, 0, content.len, delimiter.ptr, 0, delimiter.len, from); - from = find[last_index] + delimiter.len; - if (find[last_index] == -1) { - break; - } - } - if ((field.val > 1 && find[field.val - 2] == -1) || - (field.val == 1 && find[field.val - 1] == -1)) { - // field not find return null - return StringVal::null(); - } - int start_pos; - if (field.val == 1) { // find need split first part - start_pos = 0; - } else { - start_pos = find[field.val - 2] + delimiter.len; - } - int len = (find[field.val - 1] == -1 ? content.len : find[field.val - 1]) - start_pos; - return StringVal(content.ptr + start_pos, len); - } else { - int to = content.len; - int abs_field = -field.val; - std::vector find(abs_field, -1); //store substring position - for (int i = 1; i <= abs_field; i++) { // find - int last_index = i - 1; - find[last_index] = - last_index_of(content.ptr, content.len, delimiter.ptr, delimiter.len, to); - to = find[last_index] - delimiter.len; - if (find[last_index] == -1) { - break; - } - } - if ((abs_field > 1 && find[abs_field - 2] == -1) || - (abs_field == 1 && find[abs_field - 1] == -1)) { - // field not find return null - return StringVal::null(); - } - int end_pos; - if (abs_field == 1) { // find need split first part - end_pos = content.len - 1; - } else { - end_pos = find[abs_field - 2] - 1; - } - int len = - end_pos - (find[abs_field - 1] == -1 ? 0 : find[abs_field - 1] + delimiter.len) + 1; - - return StringVal(content.ptr + end_pos - len + 1, len); - } -} - -StringVal StringFunctions::replace(FunctionContext* context, const StringVal& origStr, - const StringVal& oldStr, const StringVal& newStr) { - if (origStr.is_null || oldStr.is_null || newStr.is_null) { - return StringVal::null(); - } - // Empty string is a substring of all strings. - // If old str is an empty string, the std::string.find(oldStr) is always return 0. - // With an empty old str, there is no need to do replace. - if (oldStr.len == 0) { - return origStr; - } - std::string orig_str = std::string(reinterpret_cast(origStr.ptr), origStr.len); - std::string old_str = std::string(reinterpret_cast(oldStr.ptr), oldStr.len); - std::string new_str = std::string(reinterpret_cast(newStr.ptr), newStr.len); - std::string::size_type pos = 0; - std::string::size_type oldLen = old_str.size(); - std::string::size_type newLen = new_str.size(); - while ((pos = orig_str.find(old_str, pos)) != std::string::npos) { - orig_str.replace(pos, oldLen, new_str); - pos += newLen; - } - return AnyValUtil::from_string_temp(context, orig_str); -} -// Implementation of BIT_LENGTH -// int bit_length(string input) -// Returns the length in bits of input. If input == nullptr, returns -// nullptr per MySQL -IntVal StringFunctions::bit_length(FunctionContext* context, const StringVal& str) { - if (str.is_null) { - return IntVal::null(); - } - return IntVal(str.len * 8); -} - -StringVal StringFunctions::uuid(FunctionContext* ctx) { - boost::uuids::random_generator generator; - std::string uuid = boost::uuids::to_string(generator()); - - return AnyValUtil::from_string_temp(ctx, uuid); -} } // namespace doris diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h index 01fe8646da..bcdff6f80a 100644 --- a/be/src/exprs/string_functions.h +++ b/be/src/exprs/string_functions.h @@ -32,165 +32,11 @@ namespace doris { -class OpcodeRegistry; - class StringFunctions { public: - static void init(); - - static doris_udf::StringVal substring(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, - const doris_udf::IntVal& pos, - const doris_udf::IntVal& len); - static doris_udf::StringVal substring(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, - const doris_udf::IntVal& pos); - static doris_udf::StringVal left(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, const doris_udf::IntVal& len); - static doris_udf::StringVal right(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, - const doris_udf::IntVal& len); - static doris_udf::BooleanVal starts_with(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, - const doris_udf::StringVal& prefix); - static doris_udf::BooleanVal ends_with(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, - const doris_udf::StringVal& suffix); - static doris_udf::BooleanVal null_or_empty(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::BooleanVal not_null_or_empty(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::StringVal space(doris_udf::FunctionContext* context, - const doris_udf::IntVal& len); - static doris_udf::StringVal repeat(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, const doris_udf::IntVal& n); - static doris_udf::StringVal lpad(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, const doris_udf::IntVal& len, - const doris_udf::StringVal& pad); - static doris_udf::StringVal rpad(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, const doris_udf::IntVal& len, - const doris_udf::StringVal& pad); - static doris_udf::StringVal append_trailing_char_if_absent( - doris_udf::FunctionContext* context, const doris_udf::StringVal& str, - const doris_udf::StringVal& trailing_char); - static doris_udf::IntVal length(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::IntVal char_utf8_length(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::StringVal lower(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::StringVal upper(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::StringVal initcap(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::StringVal reverse(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::StringVal trim(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::StringVal ltrim(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::StringVal rtrim(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::IntVal ascii(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::IntVal instr(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, const doris_udf::StringVal&); - static doris_udf::IntVal locate(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, const doris_udf::StringVal&); - static doris_udf::IntVal locate_pos(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, - const doris_udf::StringVal&, const doris_udf::IntVal&); - static bool set_re2_options(const doris_udf::StringVal& match_parameter, std::string* error_str, re2::RE2::Options* opts); - static void regexp_prepare(doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); - static StringVal regexp_extract(doris_udf::FunctionContext*, const doris_udf::StringVal& str, - const doris_udf::StringVal& pattern, - const doris_udf::BigIntVal& index); - static StringVal regexp_replace(doris_udf::FunctionContext*, const doris_udf::StringVal& str, - const doris_udf::StringVal& pattern, - const doris_udf::StringVal& replace); - static void regexp_close(doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); - static StringVal concat(doris_udf::FunctionContext*, int num_children, const StringVal* strs); - static StringVal concat_ws(doris_udf::FunctionContext*, const doris_udf::StringVal& sep, - int num_children, const doris_udf::StringVal* strs); - static StringVal elt(doris_udf::FunctionContext*, const doris_udf::IntVal& pos, - int num_children, const StringVal* strs); - static IntVal find_in_set(doris_udf::FunctionContext*, const doris_udf::StringVal& str, - const doris_udf::StringVal& str_set); - - static void parse_url_prepare(doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); - static StringVal parse_url(doris_udf::FunctionContext*, const doris_udf::StringVal& url, - const doris_udf::StringVal& part); - static StringVal parse_url_key(doris_udf::FunctionContext*, const doris_udf::StringVal& url, - const doris_udf::StringVal& key, - const doris_udf::StringVal& part); - static void parse_url_close(doris_udf::FunctionContext*, - doris_udf::FunctionContext::FunctionStateScope); - - static doris_udf::StringVal money_format(doris_udf::FunctionContext* context, - const doris_udf::DoubleVal& v); - - static doris_udf::StringVal money_format(doris_udf::FunctionContext* context, - const doris_udf::DecimalV2Val& v); - - static doris_udf::StringVal money_format(doris_udf::FunctionContext* context, - const doris_udf::BigIntVal& v); - - static doris_udf::StringVal money_format(doris_udf::FunctionContext* context, - const doris_udf::LargeIntVal& v); - - template - static StringVal do_money_format(FunctionContext* context, const T int_value, - const int32_t frac_value = 0) { - char local[N]; - char* p = SimpleItoaWithCommas(int_value, local, sizeof(local)); - int32_t string_val_len = local + sizeof(local) - p + 3; - StringVal result = StringVal::create_temp_string_val(context, string_val_len); - memcpy(result.ptr, p, string_val_len - 3); - *(result.ptr + string_val_len - 3) = '.'; - *(result.ptr + string_val_len - 2) = '0' + (frac_value / 10); - *(result.ptr + string_val_len - 1) = '0' + (frac_value % 10); - return result; - }; - - // Note string value must be valid decimal string which contains two digits after the decimal point - static StringVal do_money_format(FunctionContext* context, const string& value) { - bool is_positive = (value[0] != '-'); - int32_t result_len = value.size() + (value.size() - (is_positive ? 4 : 5)) / 3; - StringVal result = StringVal::create_temp_string_val(context, result_len); - if (!is_positive) { - *result.ptr = '-'; - } - for (int i = value.size() - 4, j = result_len - 4; i >= 0; i = i - 3, j = j - 4) { - *(result.ptr + j) = *(value.data() + i); - if (i - 1 < 0) break; - *(result.ptr + j - 1) = *(value.data() + i - 1); - if (i - 2 < 0) break; - *(result.ptr + j - 2) = *(value.data() + i - 2); - if (j - 3 > 1 || (j - 3 == 1 && is_positive)) { - *(result.ptr + j - 3) = ','; - } - } - memcpy(result.ptr + result_len - 3, value.data() + value.size() - 3, 3); - return result; - }; - - static StringVal split_part(FunctionContext* context, const StringVal& content, - const StringVal& delimiter, const IntVal& field); - - static StringVal replace(FunctionContext* context, const StringVal& origStr, - const StringVal& oldStr, const StringVal& newStr); - - static doris_udf::IntVal bit_length(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - - static doris_udf::StringVal uuid(doris_udf::FunctionContext*); - // The caller owns the returned regex. Returns nullptr if the pattern could not be compiled. static re2::RE2* compile_regex(const StringVal& pattern, std::string* error_str, const StringVal& match_parameter); diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index 929dc367b3..b83d233a0a 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -41,6 +41,7 @@ add_library(Olap STATIC like_column_predicate.cpp key_coder.cpp lru_cache.cpp + match_predicate.cpp memtable.cpp memtable_flush_executor.cpp merger.cpp diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h index 34f9be8093..f78e2a577f 100644 --- a/be/src/olap/delta_writer.h +++ b/be/src/olap/delta_writer.h @@ -29,7 +29,6 @@ class MemTable; class MemTracker; class Schema; class StorageEngine; -class Tuple; class TupleDescriptor; class SlotDescriptor; diff --git a/be/src/exprs/match_predicate.cpp b/be/src/olap/match_predicate.cpp similarity index 99% rename from be/src/exprs/match_predicate.cpp rename to be/src/olap/match_predicate.cpp index 2eaaeb929c..e78f52e2f8 100644 --- a/be/src/exprs/match_predicate.cpp +++ b/be/src/olap/match_predicate.cpp @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "exprs/match_predicate.h" +#include "olap/match_predicate.h" #include diff --git a/be/src/exprs/match_predicate.h b/be/src/olap/match_predicate.h similarity index 98% rename from be/src/exprs/match_predicate.h rename to be/src/olap/match_predicate.h index 8afe57481c..ff41fb00eb 100644 --- a/be/src/exprs/match_predicate.h +++ b/be/src/olap/match_predicate.h @@ -30,9 +30,6 @@ namespace doris { enum class MatchType; class MatchPredicate : public ColumnPredicate { -public: - static void init() {} - public: MatchPredicate(uint32_t column_id, const std::string& value, MatchType match_type); diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h index 5e6ec00212..80bcb26c0c 100644 --- a/be/src/olap/memtable.h +++ b/be/src/olap/memtable.h @@ -35,7 +35,6 @@ class RowsetWriter; class Schema; class SlotDescriptor; class TabletSchema; -class Tuple; class TupleDescriptor; class MemTable { diff --git a/be/src/olap/predicate_creator.h b/be/src/olap/predicate_creator.h index c12e155e0f..35970de723 100644 --- a/be/src/olap/predicate_creator.h +++ b/be/src/olap/predicate_creator.h @@ -23,11 +23,11 @@ #include "exec/olap_utils.h" #include "exprs/create_predicate_function.h" #include "exprs/hybrid_set.h" -#include "exprs/match_predicate.h" #include "olap/bloom_filter_predicate.h" #include "olap/column_predicate.h" #include "olap/comparison_predicate.h" #include "olap/in_list_predicate.h" +#include "olap/match_predicate.h" #include "olap/null_predicate.h" #include "olap/tablet_schema.h" #include "runtime/define_primitive_type.h" diff --git a/be/src/runtime/collection_value.cpp b/be/src/runtime/collection_value.cpp index fc9fb65a24..593a431fb2 100644 --- a/be/src/runtime/collection_value.cpp +++ b/be/src/runtime/collection_value.cpp @@ -29,436 +29,6 @@ namespace doris { -template -struct CollectionValueSubTypeTrait; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = int8_t; // slot size : 1 -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = bool; - using AnyValType = BooleanVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = int8_t; - using AnyValType = TinyIntVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = int16_t; - using AnyValType = SmallIntVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = int32_t; - using AnyValType = IntVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = int64_t; - using AnyValType = BigIntVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = __int128_t; - using AnyValType = LargeIntVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = float; - using AnyValType = FloatVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = double; - using AnyValType = DoubleVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = StringRef; - using AnyValType = StringVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = StringRef; - using AnyValType = StringVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = StringRef; - using AnyValType = StringVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = uint24_t; - using AnyValType = DateTimeVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = uint64_t; - using AnyValType = DateTimeVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = uint32_t; - using AnyValType = IntVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = uint64_t; - using AnyValType = BigIntVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = decimal12_t; - using AnyValType = DecimalV2Val; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = int32_t; - using AnyValType = IntVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = int64_t; - using AnyValType = BigIntVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = int128_t; - using AnyValType = LargeIntVal; -}; - -template <> -struct CollectionValueSubTypeTrait { - using CppType = CollectionValue; - using AnyValType = CollectionVal; -}; - -struct ArrayIteratorFunctionsBase {}; - -template -struct GenericArrayIteratorFunctions : public ArrayIteratorFunctionsBase { - using CppType = typename CollectionValueSubTypeTrait::CppType; - using AnyValType = typename CollectionValueSubTypeTrait::AnyValType; - - constexpr static int get_type_size() { return sizeof(CppType); } - static void shallow_set(void* item, const AnyVal* value) { - *static_cast(item) = static_cast(value)->val; - } - static void shallow_get(AnyVal* value, const void* item) { - static_cast(value)->val = *static_cast(item); - } - static void self_deep_copy(void* item, const TypeDescriptor& type_desc, - const GenMemFootprintFunc& gen_mem_footprint, bool convert_ptrs) {} - static void deserialize(void* item, const char* tuple_data, const TypeDescriptor& type_desc) {} - static size_t get_byte_size(const void* item, const TypeDescriptor& type_desc) { return 0; } - static void raw_value_write(void* item, const void* value, const TypeDescriptor& type_desc, - MemPool* pool) { - RawValue::write(value, item, type_desc, pool); - } -}; - -template -struct ArrayIteratorFunctions : public GenericArrayIteratorFunctions {}; - -template -struct ArrayIteratorFunctionsForString : public GenericArrayIteratorFunctions { - using CppType = StringRef; - using AnyValType = StringVal; - - static void shallow_set(void* item, const AnyVal* value) { - const auto* src = static_cast(value); - auto* dst = static_cast(item); - dst->data = convert_to(src->ptr); - dst->size = src->len; - } - static void shallow_get(AnyVal* value, const void* item) { - const auto* src = static_cast(item); - auto* dst = static_cast(value); - dst->ptr = convert_to(src->data); - dst->len = src->size; - } - static void self_deep_copy(void* item, const TypeDescriptor&, - const GenMemFootprintFunc& gen_mem_footprint, bool convert_ptrs) { - auto* string = static_cast(item); - if (!string->size) { - return; - } - MemFootprint footprint = gen_mem_footprint(string->size); - int64_t offset = footprint.first; - auto* copied_string = reinterpret_cast(footprint.second); - memory_copy(copied_string, string->data, string->size); - string->data = (convert_ptrs ? convert_to(offset) : copied_string); - } - static void deserialize(void* item, const char* tuple_data, const TypeDescriptor& type_desc) { - DCHECK((item != nullptr) && (tuple_data != nullptr)) << "item or tuple_data is nullptr"; - auto* string_value = static_cast(item); - if (string_value->size) { - int64_t offset = convert_to(string_value->data); - string_value->data = convert_to(tuple_data + offset); - } - } - static size_t get_byte_size(const void* item, const TypeDescriptor&) { - return static_cast(item)->size; - } -}; - -template <> -struct ArrayIteratorFunctions : public ArrayIteratorFunctionsForString {}; -template <> -struct ArrayIteratorFunctions : public ArrayIteratorFunctionsForString { -}; -template <> -struct ArrayIteratorFunctions : public ArrayIteratorFunctionsForString {}; - -template <> -struct ArrayIteratorFunctions : public GenericArrayIteratorFunctions { - using GenericArrayIteratorFunctions::CppType; - using GenericArrayIteratorFunctions::AnyValType; - - static void shallow_set(void* item, const AnyVal* value) { - const auto* src = static_cast(value); - auto* dst = static_cast(item); - *dst = DateTimeValue::from_datetime_val(*src).to_olap_date(); - } - static void shallow_get(AnyVal* value, const void* item) { - const auto* src = static_cast(item); - auto* dst = static_cast(value); - DateTimeValue data; - data.from_olap_date(uint32_t(*src)); - data.to_datetime_val(dst); - } - static void raw_value_write(void* item, const void* value, const TypeDescriptor& type_desc, - MemPool* pool) { - DateTimeVal date_time_val; - shallow_get(&date_time_val, value); - shallow_set(item, &date_time_val); - } -}; -template <> -struct ArrayIteratorFunctions : public GenericArrayIteratorFunctions { - using GenericArrayIteratorFunctions::CppType; - using GenericArrayIteratorFunctions::AnyValType; - - static void shallow_set(void* item, const AnyVal* value) { - const auto* src = static_cast(value); - auto* dst = static_cast(item); - *dst = DateTimeValue::from_datetime_val(*src).to_olap_datetime(); - } - static void shallow_get(AnyVal* value, const void* item) { - const auto* src = static_cast(item); - auto* dst = static_cast(value); - DateTimeValue data; - data.from_olap_datetime(*src); - data.to_datetime_val(dst); - } - static void raw_value_write(void* item, const void* value, const TypeDescriptor& type_desc, - MemPool* pool) { - DateTimeVal date_time_val; - shallow_get(&date_time_val, value); - shallow_set(item, &date_time_val); - } -}; - -template <> -struct ArrayIteratorFunctions - : public GenericArrayIteratorFunctions { - using GenericArrayIteratorFunctions::CppType; - using GenericArrayIteratorFunctions::AnyValType; - - static void shallow_set(void* item, const AnyVal* value) { - const auto* src = static_cast(value); - auto* dst = static_cast(item); - auto decimal_value = DecimalV2Value::from_decimal_val(*src); - dst->integer = decimal_value.int_value(); - dst->fraction = decimal_value.frac_value(); - } - static void shallow_get(AnyVal* value, const void* item) { - const auto* src = static_cast(item); - auto* dst = static_cast(value); - DecimalV2Value(src->integer, src->fraction).to_decimal_val(dst); - } - static void raw_value_write(void* item, const void* value, const TypeDescriptor& type_desc, - MemPool* pool) { - DecimalV2Val decimal_val; - shallow_get(&decimal_val, value); - shallow_set(item, &decimal_val); - } -}; - -template <> -struct ArrayIteratorFunctions : public GenericArrayIteratorFunctions { - using GenericArrayIteratorFunctions::CppType; - using GenericArrayIteratorFunctions::AnyValType; - - static void shallow_set(void* item, const AnyVal* value) { - *static_cast(item) = - CppType::from_collection_val(*static_cast(value)); - } - static void shallow_get(AnyVal* value, const void* item) { - static_cast(item)->to_collection_val(static_cast(value)); - } - static void self_deep_copy(void* item, const TypeDescriptor& type_desc, - const GenMemFootprintFunc& gen_mem_footprint, bool convert_ptrs) { - auto* collection_value = static_cast(item); - CollectionValue::deep_copy_collection(collection_value, type_desc.children[0], - gen_mem_footprint, convert_ptrs); - } - static void deserialize(void* item, const char* tuple_data, const TypeDescriptor& type_desc) { - CollectionValue::deserialize_collection(static_cast(item), tuple_data, - type_desc.children[0]); - } - static size_t get_byte_size(const void* item, const TypeDescriptor& type_desc) { - const auto* collection_value = static_cast(item); - return collection_value->get_byte_size(type_desc.children[0]); - } -}; - -ArrayIterator CollectionValue::iterator(PrimitiveType child_type) { - return internal_iterator(child_type); -} - -ArrayIterator CollectionValue::internal_iterator(PrimitiveType child_type) const { - switch (child_type) { - case TYPE_BOOLEAN: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_TINYINT: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_SMALLINT: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_INT: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_BIGINT: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_LARGEINT: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_FLOAT: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_DOUBLE: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_CHAR: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_VARCHAR: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_STRING: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_DATE: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_DATETIME: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_DATEV2: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_DATETIMEV2: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_ARRAY: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_DECIMALV2: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_DECIMAL32: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_DECIMAL64: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - case TYPE_DECIMAL128I: - return ArrayIterator(const_cast(this), - static_cast*>(nullptr)); - default: - DCHECK(false) << "Invalid child type: " << child_type; - __builtin_unreachable(); - } -} - -const ArrayIterator CollectionValue::iterator(PrimitiveType child_type) const { - return internal_iterator(child_type); -} - -Status type_check(PrimitiveType type) { - switch (type) { - case TYPE_NULL: - - case TYPE_BOOLEAN: - - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - case TYPE_LARGEINT: - - case TYPE_FLOAT: - case TYPE_DOUBLE: - - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_STRING: - - case TYPE_DATE: - case TYPE_DATETIME: - case TYPE_DATEV2: - case TYPE_DATETIMEV2: - - case TYPE_DECIMALV2: - case TYPE_DECIMAL32: - case TYPE_DECIMAL64: - case TYPE_DECIMAL128I: - - case TYPE_ARRAY: - break; - default: - return Status::InvalidArgument("Type not implemented: {}", type); - } - return Status::OK(); -} - -int sizeof_type(PrimitiveType type) { - if (type_check(type).ok()) { - return CollectionValue().iterator(type).type_size(); - } else { - DCHECK(false) << "Type not implemented: " << type; - return 0; - } -} - void CollectionValue::to_collection_val(CollectionVal* val) const { val->length = _length; val->data = _data; @@ -481,132 +51,4 @@ void CollectionValue::copy_null_signs(const CollectionValue* other) { } } -size_t CollectionValue::get_byte_size(const TypeDescriptor& item_type) const { - size_t result = 0; - if (_length == 0) { - return result; - } - if (_has_null) { - result += _length * sizeof(bool); - } - auto iterator = CollectionValue::iterator(item_type.type); - result += _length * iterator.type_size(); - - while (!iterator.is_type_fixed_width() && iterator.has_next()) { - result += iterator.get_byte_size(item_type); - iterator.next(); - } - return result; -} - -Status CollectionValue::init_collection(ObjectPool* pool, uint64_t size, PrimitiveType child_type, - CollectionValue* value) { - return init_collection( - value, [pool](size_t size) -> uint8_t* { return pool->add_array(new uint8_t[size]); }, - size, child_type); -} - -Status CollectionValue::init_collection(CollectionValue* value, const AllocateMemFunc& allocate, - uint64_t size, PrimitiveType child_type) { - if (value == nullptr) { - return Status::InvalidArgument("collection value is null"); - } - - RETURN_IF_ERROR(type_check(child_type)); - - if (size == 0) { - new (value) CollectionValue(size); - return Status::OK(); - } - - value->_data = allocate(size * sizeof_type(child_type)); - value->_length = size; - value->_has_null = false; - value->_null_signs = reinterpret_cast(allocate(size)); - memset(value->_null_signs, 0, size * sizeof(bool)); - - return Status::OK(); -} - -Status CollectionValue::init_collection(MemPool* pool, uint64_t size, PrimitiveType child_type, - CollectionValue* value) { - return init_collection( - value, [pool](size_t size) { return pool->allocate_aligned(size, 16); }, size, - child_type); -} - -Status CollectionValue::init_collection(FunctionContext* context, uint64_t size, - PrimitiveType child_type, CollectionValue* value) { - return init_collection( - value, [context](size_t size) { return context->aligned_allocate(16, size); }, size, - child_type); -} - -CollectionValue CollectionValue::from_collection_val(const CollectionVal& val) { - return CollectionValue(val.data, val.length, val.has_null, val.null_signs); -} - -// Deep copy collection. -// NOTICE: The CollectionValue* shallow_copied_cv must be initialized by calling memcpy function first ( -// copy data from origin collection value). -void CollectionValue::deep_copy_collection(CollectionValue* shallow_copied_cv, - const TypeDescriptor& item_type, - const GenMemFootprintFunc& gen_mem_footprint, - bool convert_ptrs) { - CollectionValue* cv = shallow_copied_cv; - if (cv->length() == 0) { - return; - } - - auto iterator = cv->iterator(item_type.type); - uint64_t coll_byte_size = cv->length() * iterator.type_size(); - uint64_t nulls_size = cv->has_null() ? cv->length() * sizeof(bool) : 0; - - MemFootprint footprint = gen_mem_footprint(coll_byte_size + nulls_size); - int64_t offset = footprint.first; - char* coll_data = reinterpret_cast(footprint.second); - - // copy and assign null_signs - if (cv->has_null()) { - memory_copy(convert_to(coll_data), cv->null_signs(), nulls_size); - cv->set_null_signs(convert_to(coll_data)); - } else { - cv->set_null_signs(nullptr); - } - // copy and assign data - memory_copy(coll_data + nulls_size, cv->data(), coll_byte_size); - cv->set_data(coll_data + nulls_size); - - while (!iterator.is_type_fixed_width() && iterator.has_next()) { - iterator.self_deep_copy(item_type, gen_mem_footprint, convert_ptrs); - iterator.next(); - } - - if (convert_ptrs) { - cv->set_data(convert_to(offset + nulls_size)); - if (cv->has_null()) { - cv->set_null_signs(convert_to(offset)); - } - } -} - -void CollectionValue::deserialize_collection(CollectionValue* cv, const char* tuple_data, - const TypeDescriptor& item_type) { - if (cv->length() == 0) { - new (cv) CollectionValue(cv->length()); - return; - } - // assign data and null_sign pointer position in tuple_data - int64_t data_offset = convert_to(cv->data()); - cv->set_data(convert_to(tuple_data + data_offset)); - if (cv->has_null()) { - int64_t null_offset = convert_to(cv->null_signs()); - cv->set_null_signs(convert_to(tuple_data + null_offset)); - } - auto iterator = cv->iterator(item_type.type); - while (!iterator.is_type_fixed_width() && iterator.has_next()) { - iterator.deserialize(tuple_data, item_type); - iterator.next(); - } -} } // namespace doris diff --git a/be/src/runtime/collection_value.h b/be/src/runtime/collection_value.h index 3fac161503..a64da20623 100644 --- a/be/src/runtime/collection_value.h +++ b/be/src/runtime/collection_value.h @@ -88,36 +88,6 @@ public: void copy_null_signs(const CollectionValue* other); - size_t get_byte_size(const TypeDescriptor& item_type) const; - - ArrayIterator iterator(PrimitiveType child_type); - const ArrayIterator iterator(PrimitiveType child_type) const; - - /** - * init collection, will alloc (children Type's size + 1) * (children Nums) memory - */ - static Status init_collection(ObjectPool* pool, uint64_t size, PrimitiveType child_type, - CollectionValue* value); - - static Status init_collection(MemPool* pool, uint64_t size, PrimitiveType child_type, - CollectionValue* value); - - static Status init_collection(FunctionContext* context, uint64_t size, PrimitiveType child_type, - CollectionValue* value); - - static CollectionValue from_collection_val(const CollectionVal& val); - - // Deep copy collection. - // NOTICE: The CollectionValue* shallow_copied_cv must be initialized by calling memcpy function first ( - // copy data from origin collection value). - static void deep_copy_collection(CollectionValue* shallow_copied_cv, - const TypeDescriptor& item_type, - const GenMemFootprintFunc& gen_mem_footprint, - bool convert_ptrs); - - static void deserialize_collection(CollectionValue* cv, const char* tuple_data, - const TypeDescriptor& item_type); - const void* data() const { return _data; } bool has_null() const { return _has_null; } const bool* null_signs() const { return _null_signs; } @@ -128,12 +98,6 @@ public: void set_data(void* data) { _data = data; } void set_null_signs(bool* null_signs) { _null_signs = null_signs; } -private: - using AllocateMemFunc = std::function; - static Status init_collection(CollectionValue* value, const AllocateMemFunc& allocate, - uint64_t size, PrimitiveType child_type); - ArrayIterator internal_iterator(PrimitiveType child_type) const; - private: // child column data void* _data; @@ -143,115 +107,5 @@ private: bool _has_null; // null bitmap bool* _null_signs; - - friend ArrayIterator; -}; - -class ArrayIterator { -public: - int type_size() const { return _type_size; } - bool is_type_fixed_width() const { return _is_type_fixed_width; } - - bool has_next() const { return _offset < _collection_value->size(); } - bool next() const { - if (has_next()) { - ++_offset; - return true; - } - return false; - } - bool seek(uint64_t n) const { - if (n >= _collection_value->size()) { - return false; - } - _offset = n; - return true; - } - bool is_null() const { return _collection_value->is_null_at(_offset); } - const void* get() const { - if (is_null()) { - return nullptr; - } - return reinterpret_cast(_collection_value->data()) + _offset * _type_size; - } - void* get() { - if (is_null()) { - return nullptr; - } - return reinterpret_cast(_collection_value->mutable_data()) + _offset * _type_size; - } - void get(AnyVal* value) const { - if (is_null()) { - value->is_null = true; - return; - } - value->is_null = false; - _shallow_get(value, get()); - } - void set(const AnyVal* value) { - if (_collection_value->mutable_null_signs()) { - _collection_value->mutable_null_signs()[_offset] = value->is_null; - } - if (value->is_null) { - _collection_value->set_has_null(true); - } else { - _shallow_set(get(), value); - } - } - void self_deep_copy(const TypeDescriptor& type_desc, - const GenMemFootprintFunc& gen_mem_footprint, bool convert_ptrs) { - if (is_null()) { - return; - } - _self_deep_copy(get(), type_desc, gen_mem_footprint, convert_ptrs); - } - void deserialize(const char* tuple_data, const TypeDescriptor& type_desc) { - if (is_null()) { - return; - } - _deserialize(get(), tuple_data, type_desc); - } - size_t get_byte_size(const TypeDescriptor& type) const { - if (is_null()) { - return 0; - } - return _get_byte_size(get(), type); - } - void raw_value_write(const void* value, const TypeDescriptor& type_desc, MemPool* pool) { - if (is_null()) { - return; - } - return _raw_value_write(get(), value, type_desc, pool); - } - -private: - template >> - ArrayIterator(CollectionValue* data, const T*) - : _shallow_get(T::shallow_get), - _shallow_set(T::shallow_set), - _self_deep_copy(T::self_deep_copy), - _deserialize(T::deserialize), - _get_byte_size(T::get_byte_size), - _raw_value_write(T::raw_value_write), - _collection_value(data), - _offset(0), - _type_size(T::get_type_size()), - _is_type_fixed_width(IsTypeFixedWidth) {} - void (*_shallow_get)(AnyVal*, const void*); - void (*_shallow_set)(void*, const AnyVal*); - void (*_self_deep_copy)(void*, const TypeDescriptor&, const GenMemFootprintFunc&, bool); - void (*_deserialize)(void*, const char*, const TypeDescriptor&); - size_t (*_get_byte_size)(const void* item, const TypeDescriptor&); - void (*_raw_value_write)(void* item, const void* value, const TypeDescriptor& type_desc, - MemPool* pool); - -private: - CollectionValue* _collection_value; - mutable uint64_t _offset; - const int _type_size; - const bool _is_type_fixed_width; - - friend CollectionValue; }; } // namespace doris diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index a5af1569bc..b2fbb8db8f 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -86,56 +86,6 @@ PrimitiveType convert_type_to_primitive(FunctionContext::Type type) { return PrimitiveType::INVALID_TYPE; } -// Returns the byte size of 'type' Returns 0 for variable length types. -int get_byte_size(PrimitiveType type) { - switch (type) { - case TYPE_VARCHAR: - case TYPE_STRING: - case TYPE_OBJECT: - case TYPE_HLL: - case TYPE_QUANTILE_STATE: - case TYPE_ARRAY: - case TYPE_MAP: - return 0; - - case TYPE_NULL: - case TYPE_BOOLEAN: - case TYPE_TINYINT: - return 1; - - case TYPE_SMALLINT: - return 2; - - case TYPE_INT: - case TYPE_FLOAT: - case TYPE_DECIMAL32: - return 4; - - case TYPE_BIGINT: - case TYPE_DOUBLE: - case TYPE_TIME: - case TYPE_DECIMAL64: - return 8; - - case TYPE_DATETIME: - case TYPE_DATE: - case TYPE_LARGEINT: - case TYPE_DECIMALV2: - case TYPE_DECIMAL128I: - return 16; - - case INVALID_TYPE: - // datev2/datetimev2/timev2 is not supported on row-based engine - case TYPE_DATEV2: - case TYPE_DATETIMEV2: - case TYPE_TIMEV2: - default: - DCHECK(false); - } - - return 0; -} - bool is_type_compatible(PrimitiveType lhs, PrimitiveType rhs) { if (lhs == TYPE_VARCHAR) { return rhs == TYPE_CHAR || rhs == TYPE_VARCHAR || rhs == TYPE_HLL || rhs == TYPE_OBJECT || diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index 96272edc71..bae0d8c1f5 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -96,8 +96,6 @@ constexpr bool has_variable_type(PrimitiveType type) { type == TYPE_QUANTILE_STATE || type == TYPE_STRING; } -// Returns the byte size of 'type' Returns 0 for variable length types. -int get_byte_size(PrimitiveType type); // Returns the byte size of type when in a tuple int get_slot_size(PrimitiveType type); diff --git a/be/src/runtime/raw_value.cpp b/be/src/runtime/raw_value.cpp index 7f2896ee19..5935a1fe89 100644 --- a/be/src/runtime/raw_value.cpp +++ b/be/src/runtime/raw_value.cpp @@ -33,522 +33,6 @@ namespace doris { const int RawValue::ASCII_PRECISION = 16; // print 16 digits for double/float -void RawValue::print_value_as_bytes(const void* value, const TypeDescriptor& type, - std::stringstream* stream) { - if (value == nullptr) { - return; - } - - const char* chars = reinterpret_cast(value); - const StringRef* string_val = nullptr; - - switch (type.type) { - case TYPE_NULL: - break; - case TYPE_BOOLEAN: - stream->write(chars, sizeof(bool)); - return; - - case TYPE_TINYINT: - stream->write(chars, sizeof(int8_t)); - break; - - case TYPE_SMALLINT: - stream->write(chars, sizeof(int16_t)); - break; - - case TYPE_INT: - stream->write(chars, sizeof(int32_t)); - break; - - case TYPE_BIGINT: - stream->write(chars, sizeof(int64_t)); - break; - - case TYPE_FLOAT: - stream->write(chars, sizeof(float)); - break; - - case TYPE_DOUBLE: - stream->write(chars, sizeof(double)); - break; - - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_CHAR: - case TYPE_STRING: - string_val = reinterpret_cast(value); - stream->write(const_cast(string_val->data), string_val->size); - return; - - case TYPE_DATE: - case TYPE_DATETIME: - stream->write(chars, sizeof(DateTimeValue)); - break; - - case TYPE_DATEV2: - stream->write(chars, - sizeof(doris::vectorized::DateV2Value)); - break; - - case TYPE_DATETIMEV2: - stream->write( - chars, - sizeof(doris::vectorized::DateV2Value)); - break; - - case TYPE_DECIMALV2: - stream->write(chars, sizeof(DecimalV2Value)); - break; - - case TYPE_DECIMAL32: - stream->write(chars, 4); - break; - - case TYPE_DECIMAL64: - stream->write(chars, 8); - break; - - case TYPE_DECIMAL128I: - stream->write(chars, 16); - break; - - case TYPE_LARGEINT: - stream->write(chars, sizeof(__int128)); - break; - - default: - DCHECK(false) << "bad RawValue::print_value() type: " << type; - } -} - -void RawValue::print_value(const void* value, const TypeDescriptor& type, int scale, - std::stringstream* stream) { - if (value == nullptr) { - *stream << "NULL"; - return; - } - - int old_precision = stream->precision(); - std::ios_base::fmtflags old_flags = stream->flags(); - - if (scale > -1) { - stream->precision(scale); - // Setting 'fixed' causes precision to set the number of digits printed after the - // decimal (by default it sets the maximum number of digits total). - *stream << std::fixed; - } - - std::string tmp; - const StringRef* string_val = nullptr; - - switch (type.type) { - case TYPE_BOOLEAN: { - bool val = *reinterpret_cast(value); - *stream << (val ? "true" : "false"); - return; - } - - case TYPE_TINYINT: - // Extra casting for chars since they should not be interpreted as ASCII. - *stream << static_cast(*reinterpret_cast(value)); - break; - - case TYPE_SMALLINT: - *stream << *reinterpret_cast(value); - break; - - case TYPE_INT: - *stream << *reinterpret_cast(value); - break; - - case TYPE_BIGINT: - *stream << *reinterpret_cast(value); - break; - - case TYPE_FLOAT: - *stream << *reinterpret_cast(value); - break; - - case TYPE_DOUBLE: - *stream << *reinterpret_cast(value); - break; - case TYPE_HLL: - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_STRING: - string_val = reinterpret_cast(value); - tmp.assign(const_cast(string_val->data), string_val->size); - *stream << tmp; - return; - - case TYPE_DATE: - case TYPE_DATETIME: - *stream << *reinterpret_cast(value); - break; - - case TYPE_DATEV2: - *stream << *reinterpret_cast< - const doris::vectorized::DateV2Value*>(value); - break; - - case TYPE_DATETIMEV2: - *stream << *reinterpret_cast< - const doris::vectorized::DateV2Value*>( - value); - break; - - case TYPE_DECIMALV2: - *stream << DecimalV2Value(reinterpret_cast(value)->value).to_string(); - break; - - case TYPE_DECIMAL32: { - auto decimal_val = reinterpret_cast(value); - write_text(*decimal_val, type.scale, *stream); - break; - } - - case TYPE_DECIMAL64: { - auto decimal_val = reinterpret_cast(value); - write_text(*decimal_val, type.scale, *stream); - break; - } - - case TYPE_DECIMAL128I: { - auto decimal_val = reinterpret_cast(value); - write_text(*decimal_val, type.scale, *stream); - break; - } - - case TYPE_LARGEINT: - *stream << reinterpret_cast(value)->value; - break; - - case TYPE_ARRAY: { - auto child_type = type.children[0]; - auto array_value = (const CollectionValue*)(value); - - ArrayIterator iter = array_value->iterator(child_type.type); - *stream << "["; - - int begin = 0; - while (iter.has_next()) { - if (begin != 0) { - *stream << ", "; - } - if (!iter.get()) { - *stream << "NULL"; - } else { - if (child_type.is_string_type()) { - *stream << "'"; - print_value(iter.get(), child_type, scale, stream); - *stream << "'"; - } else if (child_type.is_date_type()) { - DateTimeVal data; - iter.get(&data); - auto datetime_value = DateTimeValue::from_datetime_val(data); - print_value(&datetime_value, child_type, scale, stream); - } else if (child_type.is_decimal_v2_type()) { - DecimalV2Val data; - iter.get(&data); - auto decimal_value = DecimalV2Value::from_decimal_val(data); - print_value(&decimal_value, child_type, scale, stream); - } else if (child_type.type == TYPE_DOUBLE) { - // Note: the default precision is 6, here should be reset to 15. - // Otherwise, there is a risk of losing precision. - stream->precision(15); - print_value(iter.get(), child_type, scale, stream); - } else { - print_value(iter.get(), child_type, scale, stream); - } - } - - iter.next(); - begin++; - } - *stream << "]"; - break; - } - - default: - DCHECK(false) << "bad RawValue::print_value() type: " << type; - } - - stream->precision(old_precision); - // Undo setting stream to fixed - stream->flags(old_flags); -} - -void RawValue::print_value(const void* value, const TypeDescriptor& type, int scale, - std::string* str) { - if (value == nullptr) { - *str = "NULL"; - return; - } - - std::stringstream out; - out.precision(ASCII_PRECISION); - const StringRef* string_val = nullptr; - std::string tmp; - bool val = false; - - // Special case types that we can print more efficiently without using a std::stringstream - switch (type.type) { - case TYPE_BOOLEAN: - val = *reinterpret_cast(value); - *str = (val ? "true" : "false"); - return; - - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_OBJECT: - case TYPE_HLL: - case TYPE_QUANTILE_STATE: - case TYPE_STRING: { - string_val = reinterpret_cast(value); - std::stringstream ss; - ss << "ptr:" << (void*)string_val->data << " len:" << string_val->size; - tmp = ss.str(); - if (string_val->size <= 1000) { - tmp.assign(const_cast(string_val->data), string_val->size); - } - str->swap(tmp); - return; - } - case TYPE_NULL: { - *str = "NULL"; - return; - } - default: - print_value(value, type, scale, &out); - } - - *str = out.str(); -} - -void RawValue::write(const void* value, void* dst, const TypeDescriptor& type, MemPool* pool) { - DCHECK(value != nullptr); - - switch (type.type) { - case TYPE_NULL: - break; - case TYPE_BOOLEAN: { - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - } - - case TYPE_TINYINT: { - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - } - - case TYPE_SMALLINT: { - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - } - - case TYPE_INT: { - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - } - - case TYPE_BIGINT: { - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - } - - case TYPE_LARGEINT: { - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - } - - case TYPE_FLOAT: { - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - } - - case TYPE_TIME: - case TYPE_DOUBLE: { - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - } - - case TYPE_DATE: - case TYPE_DATETIME: - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - - case TYPE_DATEV2: - *reinterpret_cast*>( - dst) = - *reinterpret_cast< - const doris::vectorized::DateV2Value*>( - value); - break; - - case TYPE_DATETIMEV2: - *reinterpret_cast*>( - dst) = - *reinterpret_cast*>(value); - break; - - case TYPE_DECIMALV2: - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - - case TYPE_DECIMAL32: - *reinterpret_cast(dst) = - *reinterpret_cast(value); - break; - case TYPE_DECIMAL64: - *reinterpret_cast(dst) = - *reinterpret_cast(value); - break; - case TYPE_DECIMAL128I: - *reinterpret_cast(dst) = - *reinterpret_cast(value); - break; - - case TYPE_OBJECT: - case TYPE_HLL: - case TYPE_QUANTILE_STATE: - case TYPE_VARCHAR: - case TYPE_CHAR: - case TYPE_STRING: { - const StringRef* src = reinterpret_cast(value); - StringRef* dest = reinterpret_cast(dst); - dest->size = src->size; - - if (pool != nullptr) { - dest->data = reinterpret_cast(pool->allocate(dest->size)); - memcpy(const_cast(dest->data), src->data, dest->size); - } else { - dest->data = src->data; - } - - break; - } - case TYPE_ARRAY: { - DCHECK_EQ(type.children.size(), 1); - - const CollectionValue* src = reinterpret_cast(value); - CollectionValue* val = reinterpret_cast(dst); - - if (pool != nullptr) { - const auto& item_type = type.children[0]; - CollectionValue::init_collection(pool, src->size(), item_type.type, val); - ArrayIterator src_iter = src->iterator(item_type.type); - ArrayIterator val_iter = val->iterator(item_type.type); - - val->set_has_null(src->has_null()); - val->copy_null_signs(src); - - while (src_iter.has_next() && val_iter.has_next()) { - val_iter.raw_value_write(src_iter.get(), item_type, pool); - src_iter.next(); - val_iter.next(); - } - } else { - val->shallow_copy(src); - } - break; - } - default: - DCHECK(false) << "RawValue::write(): bad type: " << type; - } -} - -// TODO: can we remove some of this code duplication? Templated allocator? -void RawValue::write(const void* value, const TypeDescriptor& type, void* dst, uint8_t** buf) { - DCHECK(value != nullptr); - switch (type.type) { - case TYPE_BOOLEAN: - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - case TYPE_TINYINT: - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - case TYPE_SMALLINT: - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - case TYPE_INT: - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - case TYPE_BIGINT: - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - case TYPE_LARGEINT: - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - case TYPE_FLOAT: - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - case TYPE_DOUBLE: - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - case TYPE_DATE: - case TYPE_DATETIME: - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - case TYPE_DATEV2: - *reinterpret_cast*>( - dst) = - *reinterpret_cast< - const doris::vectorized::DateV2Value*>( - value); - break; - case TYPE_DATETIMEV2: - *reinterpret_cast*>( - dst) = - *reinterpret_cast*>(value); - break; - case TYPE_VARCHAR: - case TYPE_CHAR: - case TYPE_STRING: { - DCHECK(buf != nullptr); - const StringRef* src = reinterpret_cast(value); - StringRef* dest = reinterpret_cast(dst); - dest->size = src->size; - dest->data = reinterpret_cast(*buf); - memcpy(const_cast(dest->data), src->data, dest->size); - *buf += dest->size; - break; - } - - case TYPE_DECIMALV2: - *reinterpret_cast(dst) = *reinterpret_cast(value); - break; - - case TYPE_DECIMAL32: - *reinterpret_cast(dst) = - *reinterpret_cast(value); - break; - case TYPE_DECIMAL64: - *reinterpret_cast(dst) = - *reinterpret_cast(value); - break; - case TYPE_DECIMAL128I: - *reinterpret_cast(dst) = - *reinterpret_cast(value); - break; - - default: - DCHECK(false) << "RawValue::write(): bad type: " << type.debug_string(); - } -} - -void RawValue::write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc, - MemPool* pool) { - if (value == nullptr) { - tuple->set_null(slot_desc->null_indicator_offset()); - } else { - void* slot = tuple->get_slot(slot_desc->tuple_offset()); - RawValue::write(value, slot, slot_desc->type(), pool); - } -} - int RawValue::compare(const void* v1, const void* v2, const TypeDescriptor& type) { const StringRef* string_value1; const StringRef* string_value2; diff --git a/be/src/runtime/raw_value.h b/be/src/runtime/raw_value.h index 3e1e05a496..551b5fd488 100644 --- a/be/src/runtime/raw_value.h +++ b/be/src/runtime/raw_value.h @@ -41,20 +41,6 @@ public: // Ascii output precision for double/float static const int ASCII_PRECISION; - // Convert 'value' into ascii and write to 'stream'. nullptr turns into NULL. 'scale' - // determines how many digits after the decimal are printed for floating point numbers, - // -1 indicates to use the stream's current formatting. - static void print_value(const void* value, const TypeDescriptor& type, int scale, - std::stringstream* stream); - - // write ascii value to string instead of stringstream. - static void print_value(const void* value, const TypeDescriptor& type, int scale, - std::string* str); - - // Writes the byte representation of a value to a stringstream character-by-character - static void print_value_as_bytes(const void* value, const TypeDescriptor& type, - std::stringstream* stream); - static uint32_t get_hash_value(const void* value, const PrimitiveType& type) { return get_hash_value(value, type, 0); } @@ -98,22 +84,6 @@ public: // Return value is < 0 if v1 < v2, 0 if v1 == v2, > 0 if v1 > v2. static int compare(const void* v1, const void* v2, const TypeDescriptor& type); - // Writes the bytes of a given value into the slot of a tuple. - // For string values, the string data is copied into memory allocated from 'pool' - // only if pool is non-nullptr. - static void write(const void* value, Tuple* tuple, const SlotDescriptor* slot_desc, - MemPool* pool); - - // Writes 'src' into 'dst' for type. - // For string values, the string data is copied into 'pool' if pool is non-nullptr. - // src must be non-nullptr. - static void write(const void* src, void* dst, const TypeDescriptor& type, MemPool* pool); - - // Writes 'src' into 'dst' for type. - // String values are copied into *buffer and *buffer is updated by the length. *buf - // must be preallocated to be large enough. - static void write(const void* src, const TypeDescriptor& type, void* dst, uint8_t** buf); - // Returns true if v1 == v2. // This is more performant than compare() == 0 for string equality, mostly because of // the length comparison check. diff --git a/be/src/runtime/result_buffer_mgr.cpp b/be/src/runtime/result_buffer_mgr.cpp index e1d3069459..51d1878114 100644 --- a/be/src/runtime/result_buffer_mgr.cpp +++ b/be/src/runtime/result_buffer_mgr.cpp @@ -29,12 +29,6 @@ namespace doris { DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(result_buffer_block_count, MetricUnit::NOUNIT); -//std::size_t hash_value(const TUniqueId& fragment_id) { -// uint32_t value = RawValue::get_hash_value(&fragment_id.lo, TypeDescriptor(TYPE_BIGINT), 0); -// value = RawValue::get_hash_value(&fragment_id.hi, TypeDescriptor(TYPE_BIGINT), value); -// return value; -//} - ResultBufferMgr::ResultBufferMgr() : _stop_background_threads_latch(1) { // Each BufferControlBlock has a limited queue size of 1024, it's not needed to count the // actual size of all BufferControlBlock. diff --git a/be/src/runtime/tuple.cpp b/be/src/runtime/tuple.cpp index 1af5942dda..ec4444639c 100644 --- a/be/src/runtime/tuple.cpp +++ b/be/src/runtime/tuple.cpp @@ -34,10 +34,6 @@ namespace doris { -static void deep_copy_collection_slots(Tuple* shallow_copied_tuple, const TupleDescriptor& desc, - const GenMemFootprintFunc& gen_mem_footprint, - bool convert_ptrs); - int64_t Tuple::total_byte_size(const TupleDescriptor& desc) const { int64_t result = desc.byte_size(); if (!desc.has_varlen_slots()) { @@ -62,62 +58,6 @@ int64_t Tuple::varlen_byte_size(const TupleDescriptor& desc) const { return result; } -Tuple* Tuple::deep_copy(const TupleDescriptor& desc, MemPool* pool, bool convert_ptrs) { - Tuple* result = (Tuple*)(pool->allocate(desc.byte_size())); - deep_copy(result, desc, pool, convert_ptrs); - return result; -} - -void Tuple::deep_copy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool, bool convert_ptrs) { - memory_copy(dst, this, desc.byte_size()); - - // allocate in the same pool and then copy all non-null string slots - for (auto string_slot : desc.string_slots()) { - DCHECK(string_slot->type().is_string_type()); - StringRef* string_v = dst->get_string_slot(string_slot->tuple_offset()); - if (!dst->is_null(string_slot->null_indicator_offset())) { - if (string_v->size != 0) { - int64_t offset = pool->total_allocated_bytes(); - char* string_copy = (char*)(pool->allocate(string_v->size)); - memory_copy(string_copy, string_v->data, string_v->size); - string_v->data = (convert_ptrs ? convert_to(offset) : string_copy); - } - } else { - string_v->data = nullptr; - string_v->size = 0; - } - } - - // copy collection slot - deep_copy_collection_slots( - dst, desc, - [pool](int64_t size) -> MemFootprint { - int64_t offset = pool->total_allocated_bytes(); - uint8_t* data = pool->allocate(size); - return {offset, data}; - }, - convert_ptrs); -} - -// Deep copy collection slots. -// NOTICE: The Tuple* shallow_copied_tuple must be initialized by calling memcpy function first ( -// copy data from origin tuple). -static void deep_copy_collection_slots(Tuple* shallow_copied_tuple, const TupleDescriptor& desc, - const GenMemFootprintFunc& gen_mem_footprint, - bool convert_ptrs) { - for (auto slot_desc : desc.collection_slots()) { - DCHECK(slot_desc->type().is_collection_type()); - if (shallow_copied_tuple->is_null(slot_desc->null_indicator_offset())) { - continue; - } - - // copy collection item - CollectionValue* cv = shallow_copied_tuple->get_collection_slot(slot_desc->tuple_offset()); - CollectionValue::deep_copy_collection(cv, slot_desc->type().children[0], gen_mem_footprint, - convert_ptrs); - } -} - Tuple* Tuple::dcopy_with_new(const TupleDescriptor& desc, MemPool* pool, int64_t* bytes) { Tuple* result = (Tuple*)(pool->allocate(desc.byte_size())); *bytes = dcopy_with_new(result, desc); @@ -160,72 +100,4 @@ int64_t Tuple::release_string(const TupleDescriptor& desc) { return bytes; } -void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset, - bool convert_ptrs) { - Tuple* dst = (Tuple*)(*data); - memory_copy(dst, this, desc.byte_size()); - *data += desc.byte_size(); - *offset += desc.byte_size(); - - for (auto slot_desc : desc.string_slots()) { - DCHECK(slot_desc->type().is_string_type()); - StringRef* string_v = dst->get_string_slot(slot_desc->tuple_offset()); - if (!dst->is_null(slot_desc->null_indicator_offset())) { - memory_copy(*data, string_v->data, string_v->size); - string_v->data = (convert_ptrs ? convert_to(*offset) : *data); - *data += string_v->size; - *offset += string_v->size; - } else { - string_v->data = (convert_ptrs ? convert_to(*offset) : *data); - string_v->size = 0; - } - } - - // copy collection slots - deep_copy_collection_slots( - dst, desc, - [offset, data](int64_t size) -> MemFootprint { - MemFootprint footprint = {*offset, reinterpret_cast(*data)}; - *offset += size; - *data += size; - return footprint; - }, - convert_ptrs); -} - -std::string Tuple::to_string(const TupleDescriptor& d) const { - std::stringstream out; - out << "("; - - bool first_value = true; - for (auto slot : d.slots()) { - if (!slot->is_materialized()) { - continue; - } - if (first_value) { - first_value = false; - } else { - out << " "; - } - - if (is_null(slot->null_indicator_offset())) { - out << "null"; - } else { - std::string value_str; - RawValue::print_value(get_slot(slot->tuple_offset()), slot->type(), -1, &value_str); - out << value_str; - } - } - - out << ")"; - return out.str(); -} - -std::string Tuple::to_string(const Tuple* t, const TupleDescriptor& d) { - if (t == nullptr) { - return "null"; - } - return t->to_string(d); -} - } // namespace doris diff --git a/be/src/runtime/tuple.h b/be/src/runtime/tuple.h index 4bda9003c5..1d226cbb1f 100644 --- a/be/src/runtime/tuple.h +++ b/be/src/runtime/tuple.h @@ -67,43 +67,11 @@ public: // The size of all referenced string and collection data. int64_t varlen_byte_size(const TupleDescriptor& desc) const; - // create a copy of 'this', including all of its referenced string data, - // using pool to allocate memory. Returns the copy. - // If 'convert_ptrs' is true, converts pointers that are part of the tuple - // into offsets in 'pool'. - Tuple* deep_copy(const TupleDescriptor& desc, MemPool* pool, bool convert_ptrs); - - Tuple* deep_copy(const TupleDescriptor& desc, MemPool* pool) { - return deep_copy(desc, pool, false); - } - - // create a copy of 'this', including all its referenced string data. This - // version does not allocate a tuple, instead copying 'dst'. dst must already - // be allocated to the correct size (desc.byte_size()) - // If 'convert_ptrs' is true, converts pointers that are part of the tuple - // into offsets in 'pool'. - void deep_copy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool, bool convert_ptrs); - void deep_copy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool) { - deep_copy(dst, desc, pool, false); - } - // deep copy use 'new', must be 'free' after use Tuple* dcopy_with_new(const TupleDescriptor& desc, MemPool* pool, int64_t* bytes); int64_t dcopy_with_new(Tuple* dst, const TupleDescriptor& desc); int64_t release_string(const TupleDescriptor& desc); - // create a copy of 'this', including all referenced string data, into - // data. The tuple is written first, followed by any strings. data and offset - // will be incremented by the total number of bytes written. data must already - // be allocated to the correct size. - // If 'convert_ptrs' is true, converts pointers that are part of the tuple - // into offsets in data, based on the provided offset. Otherwise they will be - // pointers directly into data. - void deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset, bool convert_ptrs); - void deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset) { - deep_copy(desc, data, offset, false); - } - // Turn null indicator bit on. // Turn null indicator bit on. For non-nullable slots, the mask will be 0 and // this is a no-op (but we don't have to branch to check is slots are nulalble). @@ -166,9 +134,6 @@ public: void* get_data() { return _data; } - std::string to_string(const TupleDescriptor& d) const; - static std::string to_string(const Tuple* t, const TupleDescriptor& d); - private: char _data[0]; }; diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h index aca6336f6b..32d2dd486b 100644 --- a/be/src/runtime/types.h +++ b/be/src/runtime/types.h @@ -201,9 +201,6 @@ struct TypeDescriptor { bool is_bitmap_type() const { return type == TYPE_OBJECT; } - /// Returns the byte size of this type. Returns 0 for variable length types. - int get_byte_size() const { return ::doris::get_byte_size(type); } - int get_slot_size() const { return ::doris::get_slot_size(type); } static inline int get_decimal_byte_size(int precision) { diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index 073e497932..85ffc54135 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -26,7 +26,6 @@ set(UTIL_FILES arrow/row_block.cpp arrow/utils.cpp arrow/block_convertor.cpp - array_parser.cpp bfd_parser.cpp bitmap.cpp block_compression.cpp @@ -58,7 +57,6 @@ set(UTIL_FILES thrift_client.cpp thrift_server.cpp stack_util.cpp - symbols_util.cpp system_metrics.cpp url_parser.cpp url_coding.cpp @@ -105,7 +103,6 @@ set(UTIL_FILES hdfs_storage_backend.cpp hdfs_util.cpp time_lut.cpp - topn_counter.cpp cityhash102/city.cc tuple_row_zorder_compare.cpp telemetry/telemetry.cpp diff --git a/be/src/util/array_parser.cpp b/be/src/util/array_parser.cpp deleted file mode 100644 index 267b2e9896..0000000000 --- a/be/src/util/array_parser.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "util/array_parser.h" - -namespace doris { -std::unordered_map ArrayParser::_types_mapping = { - {FunctionContext::INVALID_TYPE, PrimitiveType::INVALID_TYPE}, - {FunctionContext::TYPE_NULL, PrimitiveType::TYPE_NULL}, - {FunctionContext::TYPE_BOOLEAN, PrimitiveType::TYPE_BOOLEAN}, - {FunctionContext::TYPE_TINYINT, PrimitiveType::TYPE_TINYINT}, - {FunctionContext::TYPE_SMALLINT, PrimitiveType::TYPE_SMALLINT}, - {FunctionContext::TYPE_INT, PrimitiveType::TYPE_INT}, - {FunctionContext::TYPE_BIGINT, PrimitiveType::TYPE_BIGINT}, - {FunctionContext::TYPE_LARGEINT, PrimitiveType::TYPE_LARGEINT}, - {FunctionContext::TYPE_FLOAT, PrimitiveType::TYPE_FLOAT}, - {FunctionContext::TYPE_DOUBLE, PrimitiveType::TYPE_DOUBLE}, - {FunctionContext::TYPE_DATE, PrimitiveType::TYPE_DATE}, - {FunctionContext::TYPE_DATETIME, PrimitiveType::TYPE_DATETIME}, - {FunctionContext::TYPE_CHAR, PrimitiveType::TYPE_CHAR}, - {FunctionContext::TYPE_VARCHAR, PrimitiveType::TYPE_VARCHAR}, - {FunctionContext::TYPE_HLL, PrimitiveType::TYPE_HLL}, - {FunctionContext::TYPE_STRING, PrimitiveType::TYPE_STRING}, - {FunctionContext::TYPE_DECIMALV2, PrimitiveType::TYPE_DECIMALV2}, - {FunctionContext::TYPE_OBJECT, PrimitiveType::TYPE_OBJECT}, - {FunctionContext::TYPE_ARRAY, PrimitiveType::TYPE_ARRAY}, -}; - -} \ No newline at end of file diff --git a/be/src/util/array_parser.h b/be/src/util/array_parser.h deleted file mode 100644 index 6600f6ea9c..0000000000 --- a/be/src/util/array_parser.h +++ /dev/null @@ -1,247 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include - -#include "common/status.h" -#include "runtime/collection_value.h" -#include "runtime/large_int_value.h" -#include "runtime/primitive_type.h" -#include "runtime/types.h" -#include "util/mem_util.hpp" - -namespace doris { - -template -using ConstArray = typename rapidjson::GenericValue::ConstArray; - -template -using ConstArrayIterator = typename ConstArray::ValueIterator; - -class ArrayParser { -public: - static Status parse(CollectionVal& array_val, FunctionContext* context, - const StringVal& str_val) { - rapidjson::Document document; - if (document.Parse(reinterpret_cast(str_val.ptr), str_val.len).HasParseError() || - !document.IsArray()) { - return Status::RuntimeError("Failed to parse the json to array."); - } - if (document.IsNull()) { - array_val = CollectionVal::null(); - return Status::OK(); - } - auto type_desc = _convert_to_type_descriptor(context->get_return_type()); - return _parse>( - array_val, context, - reinterpret_cast(&document)->GetArray(), type_desc); - } - -private: - static TypeDescriptor _convert_to_type_descriptor( - FunctionContext::TypeDesc function_type_desc) { - auto iterator = _types_mapping.find(function_type_desc.type); - if (iterator == _types_mapping.end()) { - return TypeDescriptor(); - } - auto type_desc = TypeDescriptor(iterator->second); - type_desc.len = function_type_desc.len; - type_desc.precision = function_type_desc.precision; - type_desc.scale = function_type_desc.scale; - for (auto child_type_desc : function_type_desc.children) { - type_desc.children.push_back(_convert_to_type_descriptor(child_type_desc)); - } - return type_desc; - } - - template - static Status _parse(CollectionVal& array_val, FunctionContext* context, - const ConstArray& array, const TypeDescriptor& type_desc) { - if (array.Empty()) { - CollectionValue(0).to_collection_val(&array_val); - return Status::OK(); - } - auto child_type_desc = type_desc.children[0]; - auto item_type = child_type_desc.type; - CollectionValue collection_value; - CollectionValue::init_collection(context, array.Size(), item_type, &collection_value); - auto iterator = collection_value.iterator(item_type); - for (auto it = array.Begin(); it != array.End(); ++it, iterator.next()) { - if (it->IsNull()) { - auto null = AnyVal(true); - iterator.set(&null); - continue; - } else if (!_is_type_valid(it, item_type)) { - return Status::RuntimeError("Failed to parse the json to array."); - } - AnyVal* val = nullptr; - Status status = _parse(&val, context, it, child_type_desc); - if (!status.ok()) { - return status; - } - iterator.set(val); - } - collection_value.to_collection_val(&array_val); - return Status::OK(); - } - - template - static bool _is_type_valid(const ConstArrayIterator iterator, - const PrimitiveType type) { - switch (type) { - case TYPE_NULL: - return iterator->IsNull(); - case TYPE_BOOLEAN: - return iterator->IsBool(); - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - case TYPE_FLOAT: - case TYPE_DOUBLE: - return iterator->IsNumber(); - case TYPE_LARGEINT: - return iterator->IsNumber() || iterator->IsString(); - case TYPE_DATE: - case TYPE_DATETIME: - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_STRING: - return iterator->IsString(); - case TYPE_OBJECT: - return iterator->IsObject(); - case TYPE_ARRAY: - return iterator->IsArray(); - case TYPE_DECIMALV2: - return iterator->IsNumber() || iterator->IsString(); - default: - return false; - } - } - - template - static Status _parse(AnyVal** val, FunctionContext* context, - const ConstArrayIterator iterator, - const TypeDescriptor& type_desc) { - switch (type_desc.type) { - case TYPE_ARRAY: - *val = reinterpret_cast(context->allocate(sizeof(CollectionVal))); - new (*val) CollectionVal(); - return _parse(*reinterpret_cast(*val), context, - iterator->GetArray(), type_desc); - case TYPE_BOOLEAN: - *val = reinterpret_cast(context->allocate(sizeof(BooleanVal))); - new (*val) BooleanVal(iterator->GetBool()); - break; - case TYPE_TINYINT: - *val = reinterpret_cast(context->allocate(sizeof(TinyIntVal))); - new (*val) TinyIntVal(iterator->GetInt()); - break; - case TYPE_SMALLINT: - *val = reinterpret_cast(context->allocate(sizeof(SmallIntVal))); - new (*val) SmallIntVal(iterator->GetInt()); - break; - case TYPE_INT: - *val = reinterpret_cast(context->allocate(sizeof(IntVal))); - new (*val) IntVal(iterator->GetInt()); - break; - case TYPE_BIGINT: - *val = reinterpret_cast(context->allocate(sizeof(BigIntVal))); - new (*val) BigIntVal(iterator->GetInt64()); - break; - case TYPE_LARGEINT: { - __int128 value = 0; - if (iterator->IsNumber()) { - if (iterator->IsUint64()) { - value = iterator->GetUint64(); - } else { - return Status::RuntimeError( - "rapidjson can't parse the number larger than Uint64, please use " - "String to parse as LARGEINT"); - } - } else { - std::string_view view(iterator->GetString(), iterator->GetStringLength()); - std::stringstream stream; - stream << view; - stream >> value; - } - *val = reinterpret_cast(context->aligned_allocate(16, sizeof(LargeIntVal))); - new (*val) LargeIntVal(value); - break; - } - case TYPE_FLOAT: - *val = reinterpret_cast(context->allocate(sizeof(FloatVal))); - new (*val) FloatVal(iterator->GetFloat()); - break; - case TYPE_DOUBLE: - *val = reinterpret_cast(context->allocate(sizeof(DoubleVal))); - new (*val) DoubleVal(iterator->GetDouble()); - break; - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_STRING: { - *val = reinterpret_cast(context->allocate(sizeof(StringVal))); - new (*val) StringVal(context->allocate(iterator->GetStringLength()), - iterator->GetStringLength()); - auto string_val = reinterpret_cast(*val); - memory_copy(string_val->ptr, iterator->GetString(), iterator->GetStringLength()); - break; - } - case TYPE_DATE: - case TYPE_DATETIME: { - DateTimeValue value; - value.from_date_str(iterator->GetString(), iterator->GetStringLength()); - *val = reinterpret_cast(context->allocate(sizeof(DateTimeVal))); - new (*val) DateTimeVal(); - value.to_datetime_val(static_cast(*val)); - break; - } - case TYPE_DECIMALV2: { - *val = reinterpret_cast(context->aligned_allocate(16, sizeof(DecimalV2Val))); - new (*val) DecimalV2Val(); - - if (iterator->IsNumber()) { - if (iterator->IsUint64()) { - DecimalV2Value(iterator->GetUint64(), 0) - .to_decimal_val(static_cast(*val)); - } else { - DecimalV2Value value; - value.assign_from_double(iterator->GetDouble()); - value.to_decimal_val(static_cast(*val)); - } - } else { - std::string_view view(iterator->GetString(), iterator->GetStringLength()); - DecimalV2Value(view).to_decimal_val(static_cast(*val)); - } - break; - } - default: - return Status::RuntimeError("Failed to parse json to type ({}).", - std::to_string(type_desc.type)); - } - return Status::OK(); - } - -private: - static std::unordered_map _types_mapping; -}; -} // namespace doris diff --git a/be/src/util/symbols_util.cpp b/be/src/util/symbols_util.cpp deleted file mode 100644 index 7ccd0793b6..0000000000 --- a/be/src/util/symbols_util.cpp +++ /dev/null @@ -1,310 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/symbols-util.cc -// and modified by Doris - -#include "util/symbols_util.h" - -#include - -#include -#include - -namespace doris { -// For the rules about gcc-compatible name mangling, see: -// http://mentorembedded.github.io/cxx-abi/abi.html#mangling -// This implementation *is* not generally compatible. It is hard coded to -// only work with functions that implement the UDF or UDA signature. That is, -// functions of the form: -// namespace::Function(doris_udf::FunctionContext*, const doris_udf::AnyVal&, etc) -// -// The general idea is to walk the types left to right and output them. This happens -// in a single pass. User literals are output as . There are many reserved, -// usually single character tokens for native types and specifying if something is a -// pointer. -// -// One additional piece of complexity is that repeated literals are compressed out. -// As literals are output, they are associated with an ID. The next time that -// we encounter the literal, we output the ID instead. -// We don't implement this generally since the way the literals are added to the -// dictionary is much more general than we need. -// e.g. for the literal ns1::ns2::class::type, -// the dictionary would add 4 literals: 'ns1', 'ns1::ns2', 'ns1::ns2::class', -// 'ns1::ns2::class::type' -// We instead take some shortcuts since we know all the argument types are -// types we define. - -// Mangled symbols must start with this. -const char* MANGLE_PREFIX = "_Z"; - -bool SymbolsUtil::is_mangled(const std::string& symbol) { - return strncmp(symbol.c_str(), MANGLE_PREFIX, strlen(MANGLE_PREFIX)) == 0; -} - -std::string SymbolsUtil::demangle(const std::string& name) { - int status = 0; - char* demangled_name = abi::__cxa_demangle(name.c_str(), nullptr, nullptr, &status); - if (status != 0) { - return name; - } - std::string result = demangled_name; - free(demangled_name); - return result; -} - -std::string SymbolsUtil::demangle_no_args(const std::string& symbol) { - std::string fn_name = demangle(symbol); - // Chop off argument list (e.g. "foo(int)" => "foo") - return fn_name.substr(0, fn_name.find('(')); -} - -std::string SymbolsUtil::demangle_name_only(const std::string& symbol) { - std::string fn_name = demangle_no_args(symbol); - // Chop off namespace and/or class name if present (e.g. "doris::foo" => "foo") - // TODO: fix for templates - return fn_name.substr(fn_name.find_last_of(':') + 1); -} - -// Appends to the stream. -// e.g. Hello --> "5Hello" -static void append_mangled_token(const std::string& s, std::stringstream* out) { - DCHECK(!s.empty()); - (*out) << s.size() << s; -} - -// Outputs the seq_id. This is base 36 encoded with an S prefix and _ suffix. -// As an added optimization, the "seq_id - 1" value is output with the first -// token as just "S". -// e.g. seq_id 0: "S_" -// seq_id 1: "S0_" -// seq_id 2: "S1_" -static void append_seq_id(int seq_id, std::stringstream* out) { - DCHECK_GE(seq_id, 0); - if (seq_id == 0) { - (*out) << "S_"; - return; - } - --seq_id; - char buffer[10]; - char* ptr = buffer + 10; - if (seq_id == 0) { - *--ptr = '0'; - } - while (seq_id != 0) { - DCHECK(ptr > buffer); - char c = static_cast(seq_id % 36); - *--ptr = (c < 10 ? '0' + c : 'A' + c - 10); - seq_id /= 36; - } - (*out) << "S"; - out->write(ptr, 10 - (ptr - buffer)); - (*out) << "_"; -} - -static void append_any_val_type(int namespace_id, const TypeDescriptor& type, - std::stringstream* s) { - (*s) << "N"; - // All the AnyVal types are in the doris_udf namespace, that token - // already came with doris_udf::FunctionContext - append_seq_id(namespace_id, s); - - switch (type.type) { - case TYPE_BOOLEAN: - append_mangled_token("BooleanVal", s); - break; - case TYPE_TINYINT: - append_mangled_token("TinyIntVal", s); - break; - case TYPE_SMALLINT: - append_mangled_token("SmallIntVal", s); - break; - case TYPE_INT: - append_mangled_token("IntVal", s); - break; - case TYPE_BIGINT: - append_mangled_token("BigIntVal", s); - break; - case TYPE_LARGEINT: - append_mangled_token("LargeIntVal", s); - break; - case TYPE_FLOAT: - append_mangled_token("FloatVal", s); - break; - case TYPE_TIME: - case TYPE_DOUBLE: - append_mangled_token("DoubleVal", s); - break; - case TYPE_VARCHAR: - case TYPE_CHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_STRING: - case TYPE_QUANTILE_STATE: - append_mangled_token("StringVal", s); - break; - case TYPE_DATE: - case TYPE_DATETIME: - append_mangled_token("DateTimeVal", s); - break; - case TYPE_DATEV2: - append_mangled_token("DateV2Val", s); - break; - case TYPE_DATETIMEV2: - append_mangled_token("DateTimeV2Val", s); - break; - case TYPE_DECIMALV2: - append_mangled_token("DecimalV2Val", s); - break; - case TYPE_DECIMAL32: - append_mangled_token("Decimal32Val", s); - break; - case TYPE_DECIMAL64: - append_mangled_token("Decimal64Val", s); - break; - case TYPE_DECIMAL128I: - append_mangled_token("Decimal128Val", s); - break; - default: - DCHECK(false) << "NYI: " << type.debug_string(); - } - (*s) << "E"; // end doris_udf namespace -} - -std::string SymbolsUtil::mangle_user_function(const std::string& fn_name, - const std::vector& arg_types, - bool has_var_args, TypeDescriptor* ret_arg_type) { - // We need to split fn_name by :: to separate scoping from tokens - const std::regex re("::"); - std::sregex_token_iterator it {fn_name.begin(), fn_name.end(), re, -1}; - std::vector name_tokens {it, {}}; - - // Mangled names use substitution as a builtin compression. The first time a token - // is seen, we output the raw token string and store the index ("seq_id"). The - // next time we see the same token, we output the index instead. - int seq_id = 0; - - // Sequence id for the doris_udf namespace token - int doris_udf_seq_id = -1; - - std::stringstream ss; - ss << MANGLE_PREFIX; - if (name_tokens.size() > 1) { - ss << "N"; // Start namespace - seq_id += name_tokens.size() - 1; // Append for all the name space tokens. - } - for (int i = 0; i < name_tokens.size(); ++i) { - append_mangled_token(name_tokens[i], &ss); - } - if (name_tokens.size() > 1) { - ss << "E"; // End fn namespace - } - ss << "PN"; // First argument and start of FunctionContext namespace - append_mangled_token("doris_udf", &ss); - doris_udf_seq_id = seq_id++; - append_mangled_token("FunctionContext", &ss); - ++seq_id; - ss << "E"; // E indicates end of namespace - - std::map argument_map; - for (int i = 0; i < arg_types.size(); ++i) { - int repeated_symbol_idx = -1; // Set to >0, if we've seen the symbol. - if (argument_map.find(arg_types[i].type) != argument_map.end()) { - repeated_symbol_idx = argument_map[arg_types[i].type]; - } - - if (has_var_args && i == arg_types.size() - 1) { - // We always specify varargs as int32 followed by the type. - ss << "i"; // The argument for the number of varargs. - ss << "P"; // This indicates what follows is a ptr (that is the array of varargs) - ++seq_id; // For "P" - if (repeated_symbol_idx > 0) { - append_seq_id(repeated_symbol_idx - 1, &ss); - continue; - } - } else { - if (repeated_symbol_idx > 0) { - append_seq_id(repeated_symbol_idx, &ss); - continue; - } - ss << "R"; // This indicates it is a reference type - ++seq_id; // For R. - } - - ss << "K"; // This indicates it is const - seq_id += 2; // For doris_udf::*Val, which is two tokens. - append_any_val_type(doris_udf_seq_id, arg_types[i], &ss); - argument_map[arg_types[i].type] = seq_id; - } - - // Output return argument. - if (ret_arg_type != nullptr) { - int repeated_symbol_idx = -1; - if (argument_map.find(ret_arg_type->type) != argument_map.end()) { - repeated_symbol_idx = argument_map[ret_arg_type->type]; - } - ss << "P"; // Return argument is a pointer - - if (repeated_symbol_idx != -1) { - // This is always last and a pointer type. - append_seq_id(argument_map[ret_arg_type->type] - 2, &ss); - } else { - append_any_val_type(doris_udf_seq_id, *ret_arg_type, &ss); - } - } - - return ss.str(); -} - -std::string SymbolsUtil::mangle_prepare_or_close_function(const std::string& fn_name) { - // We need to split fn_name by :: to separate scoping from tokens - const std::regex re("::"); - std::sregex_token_iterator it {fn_name.begin(), fn_name.end(), re, -1}; - std::vector name_tokens {it, {}}; - - // Mangled names use substitution as a builtin compression. The first time a token - // is seen, we output the raw token string and store the index ("seq_id"). The - // next time we see the same token, we output the index instead. - int seq_id = 0; - - std::stringstream ss; - ss << MANGLE_PREFIX; - if (name_tokens.size() > 1) { - ss << "N"; // Start namespace - seq_id += name_tokens.size() - 1; // Append for all the name space tokens. - } - for (int i = 0; i < name_tokens.size(); ++i) { - append_mangled_token(name_tokens[i], &ss); - } - if (name_tokens.size() > 1) { - ss << "E"; // End fn namespace - } - - ss << "PN"; // FunctionContext* argument and start of FunctionContext namespace - append_mangled_token("doris_udf", &ss); - append_mangled_token("FunctionContext", &ss); - ss << "E"; // E indicates end of namespace - - ss << "NS"; // FunctionStateScope argument - ss << seq_id; - ss << "_"; - append_mangled_token("FunctionStateScope", &ss); - ss << "E"; // E indicates end of namespace - - return ss.str(); -} -} // namespace doris diff --git a/be/src/util/symbols_util.h b/be/src/util/symbols_util.h deleted file mode 100644 index 1c4dc7b8b5..0000000000 --- a/be/src/util/symbols_util.h +++ /dev/null @@ -1,72 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/symbols-util.h -// and modified by Doris - -#pragma once - -#include -#include - -#include "runtime/types.h" - -namespace doris { - -/// Utility class to manipulate c++/IR symbols, mangling and demangling names. -class SymbolsUtil { -public: - /// Returns true if this symbol is mangled. - static bool is_mangled(const std::string& symbol); - - /// Returns the demangled string. The name is assumed to be a mangled string using the - /// gcc/llvm convention. - /// Returns the original input if it cannot be demangled. - static std::string demangle(const std::string& name); - - /// Returns the fully-qualified function name of 'symbol' (i.e. it strips the arguments - /// but retains any namespace and class names). 'symbol' may be mangled or unmangled. - /// Returns the original input if it cannot be demangled. - /// Example: "doris::foo(int arg1)" => "doris::foo" - static std::string demangle_no_args(const std::string& symbol); - - /// Returns the function name of 'symbol' (i.e., it strips the arguments and any - /// namespace/class qualifiers). 'symbol' may be mangled or unmangled. - /// Returns the original input if it cannot be demangled. - /// Example: "doris::foo(int arg1)" => "foo" - static std::string demangle_name_only(const std::string& symbol); - - /// Mangles fn_name with 'arg_types' to the function signature for user functions. - /// This maps types to AnyVal* and automatically adds the FunctionContext* - /// as the first argument. - /// The fn_name must be fully qualified. i.e namespace::class::fn. - /// if 'has_var_args' is true, the last argument in arg_types can be variable. - /// if 'ret_argument' is non-null, it is added as a last return argument. - /// TODO: this is not a general mangling function and that is more difficult to - /// do. Find a library to do this. - /// There is no place we require this to be perfect, if we can't do this right, - /// the user will need to specify the full mangled string. - static std::string mangle_user_function(const std::string& fn_name, - const std::vector& arg_types, - bool has_var_args, TypeDescriptor* ret_argument); - - /// Mangles fn_name assuming arguments - /// (doris_udf::FunctionContext*, doris_udf::FunctionContext::FunctionStateScope). - static std::string mangle_prepare_or_close_function(const std::string& fn_name); -}; - -} // namespace doris diff --git a/be/src/util/topn_counter.cpp b/be/src/util/topn_counter.cpp deleted file mode 100644 index 321612cf12..0000000000 --- a/be/src/util/topn_counter.cpp +++ /dev/null @@ -1,148 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "topn_counter.h" - -#include -#include - -#include - -#include "gen_cpp/olap_common.pb.h" -#include "slice.h" - -namespace doris { - -void TopNCounter::add_item(const std::string& item, uint64_t incrementCount) { - auto iter = _counter_map->find(item); - if (iter != _counter_map->end()) { - iter->second.add_count(incrementCount); - } else { - _counter_map->insert(std::make_pair(item, Counter(item, incrementCount))); - } - _ordered = false; -} - -void TopNCounter::serialize(std::string* buffer) { - sort_retain(_capacity); - PTopNCounter topn_counter; - topn_counter.set_top_num(_top_num); - topn_counter.set_space_expand_rate(_space_expand_rate); - for (std::vector::const_iterator it = _counter_vec->begin(); it != _counter_vec->end(); - ++it) { - PCounter* counter = topn_counter.add_counter(); - counter->set_item(it->get_item()); - counter->set_count(it->get_count()); - } - topn_counter.SerializeToString(buffer); -} - -bool TopNCounter::deserialize(const doris::Slice& src) { - PTopNCounter topn_counter; - if (!topn_counter.ParseFromArray(src.data, src.size)) { - LOG(WARNING) << "topn counter deserialize failed"; - return false; - } - - _space_expand_rate = topn_counter.space_expand_rate(); - set_top_num(topn_counter.top_num()); - for (int i = 0; i < topn_counter.counter_size(); ++i) { - const PCounter& counter = topn_counter.counter(i); - _counter_map->insert( - std::make_pair(counter.item(), Counter(counter.item(), counter.count()))); - _counter_vec->emplace_back(counter.item(), counter.count()); - } - _ordered = true; - return true; -} - -void TopNCounter::sort_retain(uint32_t capacity) { - _counter_vec->clear(); - sort_retain(capacity, _counter_vec); - _ordered = true; -} - -void TopNCounter::sort_retain(uint32_t capacity, std::vector* sort_vec) { - for (std::unordered_map::const_iterator it = _counter_map->begin(); - it != _counter_map->end(); ++it) { - sort_vec->emplace_back(it->second.get_item(), it->second.get_count()); - } - - std::sort(sort_vec->begin(), sort_vec->end(), TopNComparator()); - if (sort_vec->size() > capacity) { - for (uint32_t i = 0, n = sort_vec->size() - capacity; i < n; ++i) { - auto& counter = sort_vec->back(); - _counter_map->erase(counter.get_item()); - sort_vec->pop_back(); - } - } -} - -// Based on the parallel version of the Space Saving algorithm as described in: -// A parallel space saving algorithm for frequent items and the Hurwitz zeta distribution by Massimo Cafaro, et al. -void TopNCounter::merge(doris::TopNCounter&& other) { - if (other._counter_map->size() == 0) { - return; - } - - _space_expand_rate = other._space_expand_rate; - set_top_num(other._top_num); - bool this_full = _counter_map->size() >= _capacity; - bool another_full = other._counter_map->size() >= other._capacity; - - uint64_t m1 = this_full ? _counter_vec->back().get_count() : 0; - uint64_t m2 = another_full ? other._counter_vec->back().get_count() : 0; - - if (another_full == true) { - for (auto& entry : *(this->_counter_map)) { - entry.second.add_count(m2); - } - } - - for (auto& other_entry : *(other._counter_map)) { - auto itr = this->_counter_map->find(other_entry.first); - if (itr != _counter_map->end()) { - itr->second.add_count(other_entry.second.get_count() - m2); - } else { - this->_counter_map->insert(std::make_pair( - other_entry.first, - Counter(other_entry.first, other_entry.second.get_count() + m1))); - } - } - _ordered = false; - sort_retain(_capacity); -} - -void TopNCounter::finalize(std::string& finalize_str) { - if (!_ordered) { - sort_retain(_top_num); - } - // use json format print - rapidjson::StringBuffer buffer; - rapidjson::Writer writer(buffer); - uint32_t k = 0; - writer.StartObject(); - for (std::vector::const_iterator it = _counter_vec->begin(); - it != _counter_vec->end() && k < _top_num; ++it, ++k) { - writer.Key(it->get_item().data()); - writer.Uint64(it->get_count()); - } - writer.EndObject(); - finalize_str = buffer.GetString(); -} - -} // namespace doris diff --git a/be/src/util/topn_counter.h b/be/src/util/topn_counter.h deleted file mode 100644 index 51fabceed4..0000000000 --- a/be/src/util/topn_counter.h +++ /dev/null @@ -1,172 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "common/logging.h" -#include "runtime/datetime_value.h" -#include "runtime/decimalv2_value.h" -#include "runtime/large_int_value.h" -#include "udf/udf.h" - -namespace doris { - -static const uint32_t DEFAULT_SPACE_EXPAND_RATE = 50; - -struct Slice; - -class Counter { -public: - Counter() = default; - - Counter(const std::string& item, uint64_t count) : _item(item), _count(count) {} - - uint64_t get_count() const { return _count; } - - const std::string& get_item() const { return _item; } - - void add_count(uint64_t count) { _count += count; } - - bool operator==(const Counter& other) { - if (_item.compare(other._item) != 0) { - return false; - } - if (_count != other._count) { - return false; - } - return true; - } - -private: - std::string _item; - uint64_t _count; -}; - -// Refer to TopNCounter.java in https://github.com/apache/kylin -// Based on the Space-Saving algorithm and the Stream-Summary data structure as described in: -// Efficient Computation of Frequent and Top-k Elements in Data Streams by Metwally, Agrawal, and Abbadi -class TopNCounter { -public: - TopNCounter(uint32_t space_expand_rate = DEFAULT_SPACE_EXPAND_RATE) - : _top_num(0), - _space_expand_rate(space_expand_rate), - _capacity(0), - _ordered(false), - _counter_map(new std::unordered_map(_capacity)), - _counter_vec(new std::vector(_capacity)) {} - - TopNCounter(const Slice& src) - : _top_num(0), - _space_expand_rate(0), - _capacity(0), - _ordered(false), - _counter_map(new std::unordered_map(_capacity)), - _counter_vec(new std::vector(_capacity)) { - bool res = deserialize(src); - DCHECK(res); - } - - ~TopNCounter() { - delete _counter_map; - delete _counter_vec; - } - - template - void add_item(const T& item) { - add_item(item, 1); - } - - void add_item(const BooleanVal& item, uint64_t incrementCount) { - add_item_numeric(item, incrementCount); - } - void add_item(const TinyIntVal& item, uint64_t incrementCount) { - add_item_numeric(item, incrementCount); - } - void add_item(const SmallIntVal& item, uint64_t incrementCount) { - add_item_numeric(item, incrementCount); - } - void add_item(const IntVal& item, uint64_t incrementCount) { - add_item_numeric(item, incrementCount); - } - void add_item(const BigIntVal& item, uint64_t incrementCount) { - add_item_numeric(item, incrementCount); - } - void add_item(const FloatVal& item, uint64_t incrementCount) { - add_item_numeric(item, incrementCount); - } - void add_item(const DoubleVal& item, uint64_t incrementCount) { - add_item_numeric(item, incrementCount); - } - void add_item(const StringVal& item, uint64_t incrementCount) { - add_item(std::string((char*)item.ptr, item.len), incrementCount); - } - void add_item(const DateTimeVal& item, uint64_t incrementCount) { - char str[MAX_DTVALUE_STR_LEN]; - DateTimeValue::from_datetime_val(item).to_string(str); - add_item(std::string(str), incrementCount); - } - void add_item(const LargeIntVal& item, uint64_t incrementCount) { - add_item(LargeIntValue::to_string(item.val), incrementCount); - } - void add_item(const DecimalV2Val& item, uint64_t incrementCount) { - add_item(DecimalV2Value::from_decimal_val(item).to_string(), incrementCount); - } - - template - void add_item_numeric(const T& item, uint64_t incrementCount) { - add_item(std::to_string(item.val), incrementCount); - } - - void add_item(const std::string& item, uint64_t incrementCount); - - void serialize(std::string* buffer); - - bool deserialize(const Slice& src); - - void merge(doris::TopNCounter&& other); - - // Sort counter by count value and record it in _counter_vec - void sort_retain(uint32_t capacity); - - void sort_retain(uint32_t capacity, std::vector* sort_vec); - - void finalize(std::string&); - - void set_top_num(uint32_t top_num) { - _top_num = top_num; - _capacity = top_num * _space_expand_rate; - } - -private: - uint32_t _top_num; - uint32_t _space_expand_rate; - uint64_t _capacity; - bool _ordered; - std::unordered_map* _counter_map; - std::vector* _counter_vec; -}; - -class TopNComparator { -public: - bool operator()(const Counter& s1, const Counter& s2) { - return s1.get_count() > s2.get_count(); - } -}; -} // namespace doris diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index ca89e1715a..eabafd755e 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -819,113 +819,6 @@ inline bool Block::is_column_data_null(const doris::TypeDescriptor& type_desc, } } -// TODO: need to refactor this function, too long. -void Block::deep_copy_slot(void* dst, MemPool* pool, const doris::TypeDescriptor& type_desc, - const StringRef& data_ref, const IColumn* column, int row, - bool padding_char) { - if (type_desc.is_collection_type()) { - if (type_desc.type != TYPE_ARRAY) { - return; - } - - Field field; - column->get(row, field); - const auto& array = field.get(); - auto collection_value = reinterpret_cast(dst); - auto item_type_desc = type_desc.children.front(); - CollectionValue::init_collection(pool, array.size(), item_type_desc.type, collection_value); - - const ColumnArray* array_column = nullptr; - if (is_column_nullable(*column)) { - auto& nested_column = - reinterpret_cast(column)->get_nested_column(); - array_column = reinterpret_cast(&nested_column); - } else { - array_column = reinterpret_cast(column); - } - auto item_column = array_column->get_data_ptr().get(); - auto offset = array_column->get_offsets()[row - 1]; - auto iterator = collection_value->iterator(item_type_desc.type); - for (int i = 0; i < collection_value->length(); ++i) { - if (array[i].is_null()) { - const auto& null_value = doris_udf::AnyVal(true); - iterator.set(&null_value); - } else { - auto item_offset = offset + i; - const auto& data_ref = item_type_desc.type != TYPE_ARRAY - ? item_column->get_data_at(item_offset) - : StringRef {}; - if (item_type_desc.is_date_type()) { - // In CollectionValue, date type data is stored as either uint24_t or uint64_t. - DateTimeValue datetime_value; - deep_copy_slot(&datetime_value, pool, item_type_desc, data_ref, item_column, - item_offset, padding_char); - DateTimeVal datetime_val; - datetime_value.to_datetime_val(&datetime_val); - iterator.set(&datetime_val); - } else if (item_type_desc.is_decimal_v2_type()) { - // In CollectionValue, decimal type data is stored as decimal12_t. - DecimalV2Value decimal_value; - deep_copy_slot(&decimal_value, pool, item_type_desc, data_ref, item_column, - item_offset, padding_char); - DecimalV2Val decimal_val; - decimal_value.to_decimal_val(&decimal_val); - iterator.set(&decimal_val); - } else { - deep_copy_slot(iterator.get(), pool, item_type_desc, data_ref, item_column, - item_offset, padding_char); - } - } - iterator.next(); - } - } else if (type_desc.is_date_type()) { - VecDateTimeValue ts = - *reinterpret_cast(data_ref.data); - DateTimeValue dt; - ts.convert_vec_dt_to_dt(&dt); - memcpy(dst, &dt, sizeof(DateTimeValue)); - } else if (type_desc.type == TYPE_OBJECT) { - auto bitmap_value = (BitmapValue*)(data_ref.data); - auto size = bitmap_value->getSizeInBytes(); - - // serialize the content of string - // TODO: NEED TO REWRITE COMPLETELY. the way writing now is WRONG. - // StringRef shouldn't managing exclusive memory cause it will break RAII. - // besides, accessing object which is essentially const by non-const object - // is UB! - auto string_slot = reinterpret_cast(dst); - string_slot->data = reinterpret_cast(pool->allocate(size)); - bitmap_value->write(const_cast(string_slot->data)); //! - string_slot->size = size; - } else if (type_desc.type == TYPE_HLL) { - auto hll_value = (HyperLogLog*)(data_ref.data); - auto size = hll_value->max_serialized_size(); - auto string_slot = reinterpret_cast(dst); - string_slot->data = reinterpret_cast(pool->allocate(size)); - size_t actual_size = hll_value->serialize((uint8_t*)string_slot->data); - string_slot->size = actual_size; - } else if (type_desc.is_string_type()) { // TYPE_OBJECT and TYPE_HLL must be handled before. - memcpy(dst, (const void*)(&data_ref), sizeof(data_ref)); - // Copy the content of string - if (padding_char && type_desc.type == TYPE_CHAR) { - // serialize the content of string - auto string_slot = reinterpret_cast(dst); - string_slot->data = reinterpret_cast(pool->allocate(type_desc.len)); - string_slot->size = type_desc.len; - memset(const_cast(string_slot->data), 0, type_desc.len); //! - memcpy(const_cast(string_slot->data), data_ref.data, data_ref.size); //! - } else { - auto str_ptr = pool->allocate(data_ref.size); - memcpy(str_ptr, data_ref.data, data_ref.size); - auto string_slot = reinterpret_cast(dst); - string_slot->data = reinterpret_cast(str_ptr); - string_slot->size = data_ref.size; - } - } else { - memcpy(dst, data_ref.data, data_ref.size); - } -} - MutableBlock::MutableBlock(const std::vector& tuple_descs, int reserve_size, bool ignore_trivial_slot) { for (auto tuple_desc : tuple_descs) { diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 86138a63f8..905551a2fc 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -42,7 +42,6 @@ namespace doris { class MemPool; class RowDescriptor; class Status; -class Tuple; class TupleDescriptor; struct TypeDescriptor; @@ -371,9 +370,6 @@ private: void erase_impl(size_t position); bool is_column_data_null(const doris::TypeDescriptor& type_desc, const StringRef& data_ref, const IColumn* column_with_type_and_name, int row); - void deep_copy_slot(void* dst, MemPool* pool, const doris::TypeDescriptor& type_desc, - const StringRef& data_ref, const IColumn* column, int row, - bool padding_char); }; using Blocks = std::vector; diff --git a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h index 0c83aae98f..708ed87050 100644 --- a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h +++ b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h @@ -25,7 +25,6 @@ namespace doris { class TextConverter; -class Tuple; class TupleDescriptor; class RuntimeState; class MemPool; diff --git a/be/src/vec/exec/vdata_gen_scan_node.h b/be/src/vec/exec/vdata_gen_scan_node.h index 18ca2c040f..7993fe1511 100644 --- a/be/src/vec/exec/vdata_gen_scan_node.h +++ b/be/src/vec/exec/vdata_gen_scan_node.h @@ -26,7 +26,6 @@ namespace doris { class TextConverter; -class Tuple; class TupleDescriptor; class RuntimeState; class MemPool; diff --git a/be/src/vec/exec/vmysql_scan_node.cpp b/be/src/vec/exec/vmysql_scan_node.cpp index 332fa0235b..cda26efd48 100644 --- a/be/src/vec/exec/vmysql_scan_node.cpp +++ b/be/src/vec/exec/vmysql_scan_node.cpp @@ -127,19 +127,6 @@ Status VMysqlScanNode::open(RuntimeState* state) { return Status::OK(); } -Status VMysqlScanNode::write_text_slot(char* value, int value_length, SlotDescriptor* slot, - RuntimeState* state) { - if (!_text_converter->write_slot(slot, _tuple, value, value_length, true, false, - _tuple_pool.get())) { - std::stringstream ss; - ss << "Fail to convert mysql value:'" << value << "' to " << slot->type() << " on column:`" - << slot->col_name() + "`"; - return Status::InternalError(ss.str()); - } - - return Status::OK(); -} - Status VMysqlScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool* eos) { if (state == nullptr || block == nullptr || eos == nullptr) { return Status::InternalError("input is nullptr"); diff --git a/be/src/vec/exec/vmysql_scan_node.h b/be/src/vec/exec/vmysql_scan_node.h index 5bea0fb388..bd431f6b8a 100644 --- a/be/src/vec/exec/vmysql_scan_node.h +++ b/be/src/vec/exec/vmysql_scan_node.h @@ -58,10 +58,6 @@ private: vectorized::MutableColumnPtr* column_ptr, RuntimeState* state); // Write debug string of this into out. void debug_string(int indentation_level, std::stringstream* out) const override; - // Writes a slot in tuple from an MySQL value containing text data. - // The Mysql value is converted into the appropriate target type. - Status write_text_slot(char* value, int value_length, SlotDescriptor* slot, - RuntimeState* state); bool _is_init; MysqlScannerParam _my_param; @@ -86,8 +82,6 @@ private: std::unique_ptr _mysql_scanner; // Helper class for converting text to other types; std::unique_ptr _text_converter; - // Current tuple. - doris::Tuple* _tuple = nullptr; }; } // namespace vectorized } // namespace doris \ No newline at end of file diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 06af3c6c19..07cff73325 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -1949,6 +1949,45 @@ public: } }; +namespace MoneyFormat { + +template +static StringVal do_money_format(FunctionContext* context, const T int_value, + const int32_t frac_value = 0) { + char local[N]; + char* p = SimpleItoaWithCommas(int_value, local, sizeof(local)); + int32_t string_val_len = local + sizeof(local) - p + 3; + StringVal result = StringVal::create_temp_string_val(context, string_val_len); + memcpy(result.ptr, p, string_val_len - 3); + *(result.ptr + string_val_len - 3) = '.'; + *(result.ptr + string_val_len - 2) = '0' + (frac_value / 10); + *(result.ptr + string_val_len - 1) = '0' + (frac_value % 10); + return result; +}; + +// Note string value must be valid decimal string which contains two digits after the decimal point +static StringVal do_money_format(FunctionContext* context, const string& value) { + bool is_positive = (value[0] != '-'); + int32_t result_len = value.size() + (value.size() - (is_positive ? 4 : 5)) / 3; + StringVal result = StringVal::create_temp_string_val(context, result_len); + if (!is_positive) { + *result.ptr = '-'; + } + for (int i = value.size() - 4, j = result_len - 4; i >= 0; i = i - 3, j = j - 4) { + *(result.ptr + j) = *(value.data() + i); + if (i - 1 < 0) break; + *(result.ptr + j - 1) = *(value.data() + i - 1); + if (i - 2 < 0) break; + *(result.ptr + j - 2) = *(value.data() + i - 2); + if (j - 3 > 1 || (j - 3 == 1 && is_positive)) { + *(result.ptr + j - 3) = ','; + } + } + memcpy(result.ptr + result_len - 3, value.data() + value.size() - 3, 3); + return result; +}; + +} // namespace MoneyFormat struct MoneyFormatDoubleImpl { static DataTypes get_variadic_argument_types() { return {std::make_shared()}; } @@ -1958,7 +1997,7 @@ struct MoneyFormatDoubleImpl { for (size_t i = 0; i < input_rows_count; i++) { double value = MathFunctions::my_double_round(data_column->get_element(i), 2, false, false); - StringVal str = StringFunctions::do_money_format(context, fmt::format("{:.2f}", value)); + StringVal str = MoneyFormat::do_money_format(context, fmt::format("{:.2f}", value)); result_column->insert_data(reinterpret_cast(str.ptr), str.len); } } @@ -1972,7 +2011,7 @@ struct MoneyFormatInt64Impl { const auto* data_column = assert_cast*>(col_ptr.get()); for (size_t i = 0; i < input_rows_count; i++) { Int64 value = data_column->get_element(i); - StringVal str = StringFunctions::do_money_format(context, value); + StringVal str = MoneyFormat::do_money_format(context, value); result_column->insert_data(reinterpret_cast(str.ptr), str.len); } } @@ -1986,7 +2025,7 @@ struct MoneyFormatInt128Impl { const auto* data_column = assert_cast*>(col_ptr.get()); for (size_t i = 0; i < input_rows_count; i++) { Int128 value = data_column->get_element(i); - StringVal str = StringFunctions::do_money_format(context, value); + StringVal str = MoneyFormat::do_money_format(context, value); result_column->insert_data(reinterpret_cast(str.ptr), str.len); } } @@ -2006,7 +2045,7 @@ struct MoneyFormatDecimalImpl { DecimalV2Value rounded(0); DecimalV2Value::from_decimal_val(value).round(&rounded, 2, HALF_UP); - StringVal str = StringFunctions::do_money_format( + StringVal str = MoneyFormat::do_money_format( context, rounded.int_value(), abs(rounded.frac_value() / 10000000)); result_column->insert_data(reinterpret_cast(str.ptr), str.len); @@ -2025,7 +2064,7 @@ struct MoneyFormatDecimalImpl { frac_part = frac_part * multiplier; } - StringVal str = StringFunctions::do_money_format( + StringVal str = MoneyFormat::do_money_format( context, decimal32_column->get_whole_part(i), frac_part); result_column->insert_data(reinterpret_cast(str.ptr), str.len); @@ -2044,7 +2083,7 @@ struct MoneyFormatDecimalImpl { frac_part = frac_part * multiplier; } - StringVal str = StringFunctions::do_money_format( + StringVal str = MoneyFormat::do_money_format( context, decimal64_column->get_whole_part(i), frac_part); result_column->insert_data(reinterpret_cast(str.ptr), str.len); @@ -2063,7 +2102,7 @@ struct MoneyFormatDecimalImpl { frac_part = frac_part * multiplier; } - StringVal str = StringFunctions::do_money_format( + StringVal str = MoneyFormat::do_money_format( context, decimal128_column->get_whole_part(i), frac_part); result_column->insert_data(reinterpret_cast(str.ptr), str.len); diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index 794eb47a98..e3f12f525b 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -52,8 +52,6 @@ set(EXPRS_TEST_FILES # exprs/hybrid_set_test.cpp # exprs/in-predicate-test.cpp exprs/json_function_test.cpp - exprs/string_functions_test.cpp - exprs/math_functions_test.cpp exprs/bloom_filter_predicate_test.cpp ) set(GEO_TEST_FILES @@ -166,7 +164,6 @@ set(RUNTIME_TEST_FILES runtime/memory/chunk_allocator_test.cpp runtime/memory/system_allocator_test.cpp runtime/cache/partition_cache_test.cpp - runtime/collection_value_test.cpp runtime/free_pool_test.cpp #runtime/array_test.cpp ) @@ -232,7 +229,6 @@ set(UTIL_TEST_FILES util/sort_heap_test.cpp util/counts_test.cpp util/date_func_test.cpp - util/array_parser_test.cpp util/quantile_state_test.cpp util/hdfs_storage_backend_test.cpp util/interval_tree_test.cpp diff --git a/be/test/exprs/json_function_test.cpp b/be/test/exprs/json_function_test.cpp index b371799836..8fe64e74e4 100644 --- a/be/test/exprs/json_function_test.cpp +++ b/be/test/exprs/json_function_test.cpp @@ -38,217 +38,6 @@ public: JsonFunctionTest() {} }; -TEST_F(JsonFunctionTest, string) { - std::string json_string("{\"id\":\"name\",\"age\":11,\"money\":123000.789}"); - std::string path_string("$.id"); - rapidjson::Document document1; - rapidjson::Value* res1 = JsonFunctions::get_json_object(nullptr, json_string, path_string, - JSON_FUN_STRING, &document1); - EXPECT_EQ(std::string(res1->GetString()), "name"); - - std::string json_string2("{\"price a\": [0,1,2],\"couponFee\":0}"); - std::string path_string2("$.price a"); - rapidjson::Document document2; - rapidjson::Value* res2 = JsonFunctions::get_json_object(nullptr, json_string2, path_string2, - JSON_FUN_STRING, &document2); - rapidjson::StringBuffer buf2; - rapidjson::Writer writer2(buf2); - res2->Accept(writer2); - EXPECT_EQ(std::string(buf2.GetString()), "[0,1,2]"); - - std::string json_string3("{\"price a\": [],\"couponFee\":0}"); - std::string path_string3("$.price a"); - rapidjson::Document document3; - rapidjson::Value* res3 = JsonFunctions::get_json_object(nullptr, json_string3, path_string3, - JSON_FUN_STRING, &document3); - rapidjson::StringBuffer buf3; - rapidjson::Writer writer3(buf3); - res3->Accept(writer3); - EXPECT_EQ(std::string(buf3.GetString()), "[]"); - - std::string json_string4("{\"price a\": [],\"couponFee\":null}"); - std::string path_string4("$.couponFee"); - rapidjson::Document document4; - rapidjson::Value* res4 = JsonFunctions::get_json_object(nullptr, json_string4, path_string4, - JSON_FUN_STRING, &document4); - EXPECT_TRUE(res4->IsNull()); - - std::string json_string5( - "{\"blockNames\": {}," - "\"seatCategories\": [{\"areas\": [{\"areaId\": 205705999,\"blockIds\": []}," - "{\"areaId\": 205705998,\"blockIds\": []}],\"seatCategoryId\": 338937290}]}"); - std::string path_string5_1("$.blockNames"); - rapidjson::Document document5_1; - rapidjson::Value* res5_1 = JsonFunctions::get_json_object(nullptr, json_string5, path_string5_1, - JSON_FUN_STRING, &document5_1); - rapidjson::StringBuffer buf5_1; - rapidjson::Writer writer5_1(buf5_1); - res5_1->Accept(writer5_1); - EXPECT_EQ(std::string(buf5_1.GetString()), "{}"); - - std::string path_string5_2("$.seatCategories.areas.blockIds"); - rapidjson::Document document5_2; - rapidjson::Value* res5_2 = JsonFunctions::get_json_object(nullptr, json_string5, path_string5_2, - JSON_FUN_STRING, &document5_2); - rapidjson::StringBuffer buf5_2; - rapidjson::Writer writer5_2(buf5_2); - res5_2->Accept(writer5_2); - EXPECT_EQ(std::string(buf5_2.GetString()), "[]"); - - std::string path_string5_3("$.seatCategories.areas[0].areaId"); - rapidjson::Document document5_3; - rapidjson::Value* res5_3 = JsonFunctions::get_json_object(nullptr, json_string5, path_string5_3, - JSON_FUN_STRING, &document5_2); - rapidjson::StringBuffer buf5_3; - rapidjson::Writer writer5_3(buf5_3); - res5_3->Accept(writer5_3); - EXPECT_EQ(std::string(buf5_3.GetString()), "205705999"); -} - -TEST_F(JsonFunctionTest, json_quote) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - - EXPECT_EQ(StringVal::null(), JsonFunctions::json_quote(context, StringVal::null())); - - doris_udf::StringVal res1 = JsonFunctions::json_quote(context, StringVal("null")); - EXPECT_EQ(std::string("\"null\""), std::string((char*)res1.ptr, res1.len)); - - doris_udf::StringVal res2 = JsonFunctions::json_quote(context, StringVal("[1, 2, 3]")); - EXPECT_EQ(std::string("\"[1, 2, 3]\""), std::string((char*)res2.ptr, res2.len)); - - doris_udf::StringVal res3 = JsonFunctions::json_quote(context, StringVal("\n\b\r\t")); - EXPECT_EQ(std::string("\"\\n\\b\\r\\t\""), std::string((char*)res3.ptr, res3.len)); - - doris_udf::StringVal res4 = JsonFunctions::json_quote(context, StringVal("\"")); - EXPECT_EQ(std::string("\"\\\"\""), std::string((char*)res4.ptr, res4.len)); - - doris_udf::StringVal json_str = {""}; - doris_udf::StringVal res5 = JsonFunctions::json_quote(context, json_str); - EXPECT_EQ(std::string("\"\""), std::string((char*)res5.ptr, res5.len)); - delete context; -} - -TEST_F(JsonFunctionTest, json_array) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - - doris_udf::StringVal json_str1[2] = {"[1,2,3]", "5"}; - doris_udf::StringVal res1 = JsonFunctions::json_array(context, 2, json_str1); - EXPECT_EQ(std::string("[\"[1,2,3]\"]"), std::string((char*)res1.ptr, res1.len)); - - doris_udf::StringVal json_str2[4] = {"1", "abc", "null", "250"}; - doris_udf::StringVal res2 = JsonFunctions::json_array(context, 4, json_str2); - EXPECT_EQ(std::string("[1,\"abc\",null]"), std::string((char*)res2.ptr, res2.len)); - - doris_udf::StringVal json_str3[1] = {""}; - doris_udf::StringVal res3 = JsonFunctions::json_array(context, 1, json_str3); - EXPECT_EQ(std::string("[]"), std::string((char*)res3.ptr, res3.len)); - - doris_udf::StringVal json_str4[2] = {"null", "0"}; - doris_udf::StringVal res4 = JsonFunctions::json_array(context, 2, json_str4); - EXPECT_EQ(std::string("[null]"), std::string((char*)res4.ptr, res4.len)); - delete context; -} - -TEST_F(JsonFunctionTest, json_object) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - doris_udf::StringVal json_str1[3] = {"id", "87", "52"}; - doris_udf::StringVal res1 = JsonFunctions::json_object(context, 3, json_str1); - EXPECT_EQ(std::string("{\"id\":87}"), std::string((char*)res1.ptr, res1.len)); - - doris_udf::StringVal json_str2[5] = {"name", "Jack", "score", "[87,98,90]", "5555"}; - doris_udf::StringVal res2 = JsonFunctions::json_object(context, 5, json_str2); - EXPECT_EQ(std::string("{\"name\":\"Jack\",\"score\":\"[87,98,90]\"}"), - std::string((char*)res2.ptr, res2.len)); - - doris_udf::StringVal json_str3[3] = {"key", "null", "50"}; - doris_udf::StringVal res3 = JsonFunctions::json_object(context, 3, json_str3); - EXPECT_EQ(std::string("{\"key\":null}"), std::string((char*)res3.ptr, res3.len)); - - doris_udf::StringVal json_str4[1] = {""}; - doris_udf::StringVal res4 = JsonFunctions::json_object(context, 1, json_str4); - EXPECT_EQ(std::string("{}"), std::string((char*)res4.ptr, res4.len)); - delete context; -} - -TEST_F(JsonFunctionTest, int) { - std::string json_string("{\"id\":\"name\",\"age\":11,\"money\":123000.789}"); - std::string path_string("$.age"); - rapidjson::Document document; - rapidjson::Value* res = JsonFunctions::get_json_object(nullptr, json_string, path_string, - JSON_FUN_INT, &document); - EXPECT_EQ(res->GetInt(), 11); - - std::string json_string1( - "{\"list\":[{\"id\":[{\"aa\":1}]},{\"id\":[{\"aa\":\"cc\"}]}," - "{\"id\":[{\"kk\":\"cc\"}]}]}"); - std::string path_string1("$.list.id.aa[0]"); - rapidjson::Document document1; - rapidjson::Value* res1 = JsonFunctions::get_json_object(nullptr, json_string1, path_string1, - JSON_FUN_INT, &document1); - EXPECT_EQ(res1->GetInt(), 1); - - std::string json_string2("[1,2,3,5,8,0]"); - std::string path_string2("$.[3]"); - rapidjson::Document document2; - rapidjson::Value* res2 = JsonFunctions::get_json_object(nullptr, json_string2, path_string2, - JSON_FUN_INT, &document2); - EXPECT_EQ(res2->GetInt(), 5); - - std::string json_string3("{\"price a\": [0,1,2],\"couponFee\":0.0}"); - std::string path_string3_1("$.price a[3]"); - rapidjson::Document document3_1; - rapidjson::Value* res3_1 = JsonFunctions::get_json_object(nullptr, json_string3, path_string3_1, - JSON_FUN_INT, &document3_1); - EXPECT_TRUE(res3_1 == nullptr); - - std::string path_string3_2("$.couponFee"); - rapidjson::Document document3_2; - rapidjson::Value* res3_2 = JsonFunctions::get_json_object(nullptr, json_string3, path_string3_2, - JSON_FUN_INT, &document3_2); - EXPECT_FALSE(res3_2->IsInt()); -} - -TEST_F(JsonFunctionTest, double) { - std::string json_string("{\"id\":\"name\",\"age\":11,\"money\":123000.789}"); - std::string path_string("$.money"); - rapidjson::Document document; - rapidjson::Value* res = JsonFunctions::get_json_object(nullptr, json_string, path_string, - JSON_FUN_DOUBLE, &document); - EXPECT_EQ(res->GetDouble(), 123000.789); - - std::string path_string2("$.age"); - rapidjson::Document document2; - rapidjson::Value* res2 = JsonFunctions::get_json_object(nullptr, json_string, path_string2, - JSON_FUN_DOUBLE, &document2); - EXPECT_EQ(res2->GetInt(), 11); -} - -TEST_F(JsonFunctionTest, special_char) { - std::string json_string("{\"key with.dot\": [\"v1\", \"v2\"]}"); - std::string path_string("$.\"key with.dot\"[1]"); - rapidjson::Document document; - rapidjson::Value* res = JsonFunctions::get_json_object(nullptr, json_string, path_string, - JSON_FUN_DOUBLE, &document); - EXPECT_FALSE(res->GetString() == nullptr); - EXPECT_EQ(std::string(res->GetString()), "v2"); - - std::string json_string2("{\"key with|\": [\"v1\", \"v2\"]}"); - std::string path_string2("$.key with|[0]"); - rapidjson::Document document2; - rapidjson::Value* res2 = JsonFunctions::get_json_object(nullptr, json_string2, path_string2, - JSON_FUN_DOUBLE, &document2); - EXPECT_FALSE(res2->GetString() == nullptr); - EXPECT_EQ(std::string(res2->GetString()), "v1"); - - std::string json_string3("{\"key with.dot\": [{\"key2.dot\":\"v1\"}, {\"key3.dot\":\"v2\"}]}"); - std::string path_string3("$.\"key with.dot\"[0].\"key2.dot\""); - rapidjson::Document document3; - rapidjson::Value* res3 = JsonFunctions::get_json_object(nullptr, json_string3, path_string3, - JSON_FUN_DOUBLE, &document3); - EXPECT_FALSE(res3->GetString() == nullptr); - EXPECT_EQ(std::string(res3->GetString()), "v1"); -} - TEST_F(JsonFunctionTest, json_path1) { bool wrap_explicitly; std::string json_raw_data( diff --git a/be/test/exprs/math_functions_test.cpp b/be/test/exprs/math_functions_test.cpp deleted file mode 100644 index 040ce49d34..0000000000 --- a/be/test/exprs/math_functions_test.cpp +++ /dev/null @@ -1,288 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/math_functions.h" - -#include - -#include -#include - -#include "runtime/large_int_value.h" -#include "runtime/mem_pool.h" -#include "testutil/function_utils.h" -#include "udf/udf_internal.h" - -namespace doris { - -class MathFunctionsTest : public testing::Test { -public: - MathFunctionsTest() = default; - - void SetUp() { - utils = new FunctionUtils(); - ctx = utils->get_fn_ctx(); - } - void TearDown() { delete utils; } - - FunctionUtils* utils; - FunctionContext* ctx; -}; - -TEST_F(MathFunctionsTest, abs) { - // FloatVal - FloatVal fv1(0.0f); - FloatVal fv2(0.1f); - FloatVal fv3(FLT_MAX); - FloatVal fv4(FLT_MIN); - EXPECT_EQ(fv1, MathFunctions::abs(ctx, FloatVal(0.0))); - EXPECT_EQ(fv1, MathFunctions::abs(ctx, FloatVal(-0.0))); - EXPECT_EQ(fv2, MathFunctions::abs(ctx, FloatVal(0.1))); - EXPECT_EQ(fv2, MathFunctions::abs(ctx, FloatVal(-0.1))); - EXPECT_EQ(fv3, MathFunctions::abs(ctx, FloatVal(FLT_MAX))); - EXPECT_EQ(fv3, MathFunctions::abs(ctx, FloatVal(-FLT_MAX))); - EXPECT_EQ(fv4, MathFunctions::abs(ctx, FloatVal(FLT_MIN))); - EXPECT_EQ(fv4, MathFunctions::abs(ctx, FloatVal(-FLT_MIN))); - - // DoubleVal - DoubleVal dv1(0.0); - DoubleVal dv2(0.1); - DoubleVal dv3(DBL_MAX); - DoubleVal dv4(DBL_MIN); - EXPECT_EQ(dv1, MathFunctions::abs(ctx, DoubleVal(0.0))); - EXPECT_EQ(dv1, MathFunctions::abs(ctx, DoubleVal(-0.0))); - EXPECT_EQ(dv2, MathFunctions::abs(ctx, DoubleVal(0.1))); - EXPECT_EQ(dv2, MathFunctions::abs(ctx, DoubleVal(-0.1))); - EXPECT_EQ(dv3, MathFunctions::abs(ctx, DoubleVal(DBL_MAX))); - EXPECT_EQ(dv3, MathFunctions::abs(ctx, DoubleVal(-DBL_MAX))); - EXPECT_EQ(dv4, MathFunctions::abs(ctx, DoubleVal(DBL_MIN))); - EXPECT_EQ(dv4, MathFunctions::abs(ctx, DoubleVal(-DBL_MIN))); - - // LargeIntVal - LargeIntVal liv1(0); - LargeIntVal liv2(1); - LargeIntVal liv3(MAX_INT128); - LargeIntVal liv4(__int128(INT64_MAX)); - LargeIntVal liv5(-__int128(INT64_MIN)); - - EXPECT_EQ(liv1, MathFunctions::abs(ctx, LargeIntVal(0))); - EXPECT_EQ(liv1, MathFunctions::abs(ctx, LargeIntVal(-0))); - EXPECT_EQ(liv2, MathFunctions::abs(ctx, LargeIntVal(1))); - EXPECT_EQ(liv2, MathFunctions::abs(ctx, LargeIntVal(-1))); - EXPECT_EQ(liv3, MathFunctions::abs(ctx, LargeIntVal(MAX_INT128))); - EXPECT_EQ(liv3, MathFunctions::abs(ctx, LargeIntVal(-MAX_INT128))); - EXPECT_EQ(liv3, MathFunctions::abs(ctx, LargeIntVal(MIN_INT128 + 1))); - // BigIntVal - EXPECT_EQ(liv1, MathFunctions::abs(ctx, BigIntVal(0))); - EXPECT_EQ(liv1, MathFunctions::abs(ctx, BigIntVal(-0))); - EXPECT_EQ(liv2, MathFunctions::abs(ctx, BigIntVal(1))); - EXPECT_EQ(liv2, MathFunctions::abs(ctx, BigIntVal(-1))); - EXPECT_EQ(liv4, MathFunctions::abs(ctx, BigIntVal(INT64_MAX))); - EXPECT_EQ(liv5, MathFunctions::abs(ctx, BigIntVal(INT64_MIN))); - - // IntVal - BigIntVal biv1(0); - BigIntVal biv2(1); - BigIntVal biv3(int64_t(INT32_MAX)); - BigIntVal biv4(-int64_t(INT32_MIN)); - - EXPECT_EQ(biv1, MathFunctions::abs(ctx, IntVal(0))); - EXPECT_EQ(biv1, MathFunctions::abs(ctx, IntVal(-0))); - EXPECT_EQ(biv2, MathFunctions::abs(ctx, IntVal(1))); - EXPECT_EQ(biv2, MathFunctions::abs(ctx, IntVal(-1))); - EXPECT_EQ(biv3, MathFunctions::abs(ctx, IntVal(INT32_MAX))); - EXPECT_EQ(biv4, MathFunctions::abs(ctx, IntVal(INT32_MIN))); - - // SmallIntVal - IntVal iv1(0); - IntVal iv2(1); - IntVal iv3(int32_t(INT16_MAX)); - IntVal iv4(-int32_t(INT16_MIN)); - EXPECT_EQ(iv1, MathFunctions::abs(ctx, SmallIntVal(0))); - EXPECT_EQ(iv1, MathFunctions::abs(ctx, SmallIntVal(-0))); - EXPECT_EQ(iv2, MathFunctions::abs(ctx, SmallIntVal(1))); - EXPECT_EQ(iv2, MathFunctions::abs(ctx, SmallIntVal(-1))); - EXPECT_EQ(iv3, MathFunctions::abs(ctx, SmallIntVal(INT16_MAX))); - EXPECT_EQ(iv4, MathFunctions::abs(ctx, SmallIntVal(INT16_MIN))); - - //TinyIntVal - SmallIntVal siv1(0); - SmallIntVal siv2(1); - SmallIntVal siv3(int16_t(INT8_MAX)); - SmallIntVal siv4(-int16_t(INT8_MIN)); - EXPECT_EQ(siv1, MathFunctions::abs(ctx, TinyIntVal(0))); - EXPECT_EQ(siv1, MathFunctions::abs(ctx, TinyIntVal(-0))); - EXPECT_EQ(siv2, MathFunctions::abs(ctx, TinyIntVal(1))); - EXPECT_EQ(siv2, MathFunctions::abs(ctx, TinyIntVal(-1))); - EXPECT_EQ(siv3, MathFunctions::abs(ctx, TinyIntVal(INT8_MAX))); - EXPECT_EQ(siv4, MathFunctions::abs(ctx, TinyIntVal(INT8_MIN))); -} - -TEST_F(MathFunctionsTest, rand) { - doris_udf::FunctionContext::TypeDesc type; - type.type = doris_udf::FunctionContext::TYPE_DOUBLE; - std::vector arg_types; - doris_udf::FunctionContext::TypeDesc type1; - type1.type = doris_udf::FunctionContext::TYPE_BIGINT; - arg_types.push_back(type1); - FunctionUtils* utils1 = new FunctionUtils(type, arg_types, 8); - FunctionContext* ctx1 = utils1->get_fn_ctx(); - std::vector constant_args; - BigIntVal bi(1); - constant_args.push_back(&bi); - ctx1->impl()->set_constant_args(constant_args); - - MathFunctions::rand_prepare(ctx1, FunctionContext::THREAD_LOCAL); - DoubleVal dv1 = MathFunctions::rand_seed(ctx1, BigIntVal(0)); - MathFunctions::rand_close(ctx1, FunctionContext::THREAD_LOCAL); - - MathFunctions::rand_prepare(ctx1, FunctionContext::THREAD_LOCAL); - DoubleVal dv2 = MathFunctions::rand_seed(ctx1, BigIntVal(0)); - MathFunctions::rand_close(ctx1, FunctionContext::THREAD_LOCAL); - - EXPECT_EQ(dv1.val, dv2.val); - delete utils1; - - MathFunctions::rand_prepare(ctx, FunctionContext::THREAD_LOCAL); - DoubleVal dv3 = MathFunctions::rand(ctx); - MathFunctions::rand_close(ctx, FunctionContext::THREAD_LOCAL); - - MathFunctions::rand_prepare(ctx, FunctionContext::THREAD_LOCAL); - DoubleVal dv4 = MathFunctions::rand(ctx); - MathFunctions::rand_close(ctx, FunctionContext::THREAD_LOCAL); - - EXPECT_NE(dv3.val, dv4.val); -} - -TEST_F(MathFunctionsTest, hex_int) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - - EXPECT_EQ(StringVal::null(), MathFunctions::hex_string(context, StringVal::null())); - - EXPECT_EQ( - StringVal("7FFFFFFFFFFFFFFF"), - MathFunctions::hex_int(context, BigIntVal(9223372036854775807))); //BigIntVal max_value - - EXPECT_EQ(StringVal("FFE5853AB393E6C0"), - MathFunctions::hex_int(context, BigIntVal(-7453337203775808))); - - EXPECT_EQ(StringVal("0"), MathFunctions::hex_int(context, BigIntVal(0))); - - EXPECT_EQ(StringVal("C"), MathFunctions::hex_int(context, BigIntVal(12))); - - EXPECT_EQ(StringVal("90"), MathFunctions::hex_int(context, BigIntVal(144))); - - EXPECT_EQ(StringVal("FFFFFFFFFFFFFFFF"), MathFunctions::hex_int(context, BigIntVal(-1))); - - EXPECT_EQ(StringVal("FFFFFFFFFFFFFFFE"), MathFunctions::hex_int(context, BigIntVal(-2))); - - EXPECT_EQ(StringVal("24EC1"), MathFunctions::hex_int(context, BigIntVal(151233))); - - delete context; -} - -TEST_F(MathFunctionsTest, hex_string) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - - EXPECT_EQ(StringVal::null(), MathFunctions::hex_string(context, StringVal::null())); - - EXPECT_EQ(StringVal("30"), MathFunctions::hex_string(context, StringVal("0"))); - - EXPECT_EQ(StringVal("31"), MathFunctions::hex_string(context, StringVal("1"))); - - EXPECT_EQ(StringVal("313233"), MathFunctions::hex_string(context, StringVal("123"))); - - EXPECT_EQ(StringVal("41"), MathFunctions::hex_string(context, StringVal("A"))); - - EXPECT_EQ(StringVal("61"), MathFunctions::hex_string(context, StringVal("a"))); - - EXPECT_EQ(StringVal("E68891"), MathFunctions::hex_string(context, StringVal("我"))); - - EXPECT_EQ(StringVal("3F"), MathFunctions::hex_string(context, StringVal("?"))); - - delete context; -} - -TEST_F(MathFunctionsTest, unhex) { - MemPool mem_pool; - doris_udf::FunctionContext* context = - doris_udf::FunctionContext::create_test_context(&mem_pool); - - EXPECT_EQ(StringVal::null(), MathFunctions::unhex(context, StringVal::null())); - - EXPECT_EQ(StringVal("123"), MathFunctions::unhex(context, StringVal("313233"))); - - EXPECT_EQ(StringVal(""), MathFunctions::unhex(context, StringVal("@!#"))); - - EXPECT_EQ(StringVal(""), MathFunctions::unhex(context, StringVal("@@"))); - - EXPECT_EQ(StringVal("a"), MathFunctions::unhex(context, StringVal("61"))); - - EXPECT_EQ(StringVal("123"), MathFunctions::unhex(context, StringVal("313233"))); - - EXPECT_EQ(StringVal(""), MathFunctions::unhex(context, StringVal("我"))); - - EXPECT_EQ(StringVal("?"), MathFunctions::unhex(context, StringVal("EFBC9F"))); - - delete context; -} - -TEST_F(MathFunctionsTest, round_bankers) { - BigIntVal r0(0); - BigIntVal r1(-4); - BigIntVal r2(4); - DoubleVal r3(3.6); - DoubleVal r4(10.4); - DoubleVal r5(10.76); - - EXPECT_EQ(r0, MathFunctions::round_bankers(ctx, DoubleVal(0.4))); - EXPECT_EQ(r1, MathFunctions::round_bankers(ctx, DoubleVal(-3.5))); - EXPECT_EQ(r2, MathFunctions::round_bankers(ctx, DoubleVal(4.5))); - EXPECT_EQ(r3, MathFunctions::round_bankers(ctx, DoubleVal(3.55), IntVal(1))); - EXPECT_EQ(r3, MathFunctions::round_bankers(ctx, DoubleVal(3.65), IntVal(1))); - EXPECT_EQ(r4, MathFunctions::round_bankers(ctx, DoubleVal(10.35), IntVal(1))); - EXPECT_EQ(r5, MathFunctions::round_bankers(ctx, DoubleVal(10.755), IntVal(2))); -} - -TEST_F(MathFunctionsTest, round_up_to) { - DoubleVal r0(0); - DoubleVal r1(1); - DoubleVal r2(3); - DoubleVal r3(4); - DoubleVal r4(3.5); - DoubleVal r5(3.55); - - DoubleVal r6(222500); - - EXPECT_EQ(r0, MathFunctions::round_up_to(ctx, DoubleVal(0), IntVal(0))); - EXPECT_EQ(r1, MathFunctions::round_up_to(ctx, DoubleVal(0.5), IntVal(0))); - EXPECT_EQ(r1, MathFunctions::round_up_to(ctx, DoubleVal(0.51), IntVal(0))); - // not 2 - EXPECT_EQ(r2, MathFunctions::round_up_to(ctx, DoubleVal(2.5), IntVal(0))); - EXPECT_EQ(r3, MathFunctions::round_up_to(ctx, DoubleVal(3.5), IntVal(0))); - - EXPECT_EQ(r4, MathFunctions::round_up_to(ctx, DoubleVal(3.5451), IntVal(1))); - EXPECT_EQ(r5, MathFunctions::round_up_to(ctx, DoubleVal(3.5451), IntVal(2))); - - // not 3.54 - EXPECT_EQ(r5, MathFunctions::round_up_to(ctx, DoubleVal(3.5450), IntVal(2))); - - // not 222400 - EXPECT_EQ(r6, MathFunctions::round_up_to(ctx, DoubleVal(222450.00), IntVal(-2))); -} - -} // namespace doris diff --git a/be/test/exprs/string_functions_test.cpp b/be/test/exprs/string_functions_test.cpp deleted file mode 100644 index 89c58ecfb3..0000000000 --- a/be/test/exprs/string_functions_test.cpp +++ /dev/null @@ -1,819 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/string_functions.h" - -#include -#include - -#include -#include - -#include "exprs/anyval_util.h" -#include "runtime/large_int_value.h" -#include "testutil/function_utils.h" -#include "testutil/test_util.h" -#include "util/simd/vstring_function.h" - -namespace doris { - -class StringFunctionsTest : public testing::Test { -public: - StringFunctionsTest() = default; - - void SetUp() { - utils = new FunctionUtils(); - ctx = utils->get_fn_ctx(); - } - void TearDown() { delete utils; } - -private: - FunctionUtils* utils; - FunctionContext* ctx; -}; - -TEST_F(StringFunctionsTest, do_money_format_for_bigint_bench) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - StringVal expected = AnyValUtil::from_string(ctx, std::string("9,223,372,036,854,775,807.00")); - BigIntVal bigIntVal(9223372036854775807); - for (int i = 0; i < LOOP_LESS_OR_MORE(10, 10000000); i++) { - StringVal result = StringFunctions::money_format(context, bigIntVal); - EXPECT_EQ(expected, result); - } - delete context; -} - -TEST_F(StringFunctionsTest, do_money_format_for_decimalv2_bench) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - StringVal expected = AnyValUtil::from_string(ctx, std::string("9,223,372,085.87")); - DecimalV2Value dv1(std::string("9223372085.8678")); - DecimalV2Val decimalV2Val; - dv1.to_decimal_val(&decimalV2Val); - for (int i = 0; i < LOOP_LESS_OR_MORE(10, 10000000); i++) { - StringVal result = StringFunctions::money_format(context, decimalV2Val); - EXPECT_EQ(expected, result); - } - delete context; -} - -TEST_F(StringFunctionsTest, money_format_bigint) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - - StringVal result = StringFunctions::money_format(context, doris_udf::BigIntVal(123456)); - StringVal expected = AnyValUtil::from_string(ctx, std::string("123,456.00")); - EXPECT_EQ(expected, result); - - result = StringFunctions::money_format(context, doris_udf::BigIntVal(-123456)); - expected = AnyValUtil::from_string(ctx, std::string("-123,456.00")); - EXPECT_EQ(expected, result); - - result = StringFunctions::money_format(context, doris_udf::BigIntVal(9223372036854775807)); - expected = AnyValUtil::from_string(ctx, std::string("9,223,372,036,854,775,807.00")); - EXPECT_EQ(expected, result); - delete context; -} - -TEST_F(StringFunctionsTest, money_format_large_int) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - __int128 value = MAX_INT128; - StringVal result = StringFunctions::money_format(context, doris_udf::LargeIntVal(value)); - StringVal expected = AnyValUtil::from_string_temp( - context, std::string("170,141,183,460,469,231,731,687,303,715,884,105,727.00")); - EXPECT_EQ(expected, result); - - value = MIN_INT128; - result = StringFunctions::money_format(context, doris_udf::LargeIntVal(value)); - expected = AnyValUtil::from_string_temp( - context, std::string("-170,141,183,460,469,231,731,687,303,715,884,105,728.00")); - EXPECT_EQ(expected, result); - delete context; -} - -TEST_F(StringFunctionsTest, money_format_double) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - - StringVal result = StringFunctions::money_format(context, doris_udf::DoubleVal(1234.456)); - StringVal expected = AnyValUtil::from_string(ctx, std::string("1,234.46")); - EXPECT_EQ(expected, result); - - result = StringFunctions::money_format(context, doris_udf::DoubleVal(1234.45)); - expected = AnyValUtil::from_string(ctx, std::string("1,234.45")); - EXPECT_EQ(expected, result); - - result = StringFunctions::money_format(context, doris_udf::DoubleVal(1234.4)); - expected = AnyValUtil::from_string(ctx, std::string("1,234.40")); - EXPECT_EQ(expected, result); - - result = StringFunctions::money_format(context, doris_udf::DoubleVal(1234.454)); - expected = AnyValUtil::from_string(ctx, std::string("1,234.45")); - EXPECT_EQ(expected, result); - - result = StringFunctions::money_format(context, doris_udf::DoubleVal(-36854775807.039)); - expected = AnyValUtil::from_string(ctx, std::string("-36,854,775,807.04")); - EXPECT_EQ(expected, result); - - delete context; -} - -TEST_F(StringFunctionsTest, money_format_decimal_v2) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - - DecimalV2Value dv1(std::string("3333333333.2222222222")); - DecimalV2Val value1; - dv1.to_decimal_val(&value1); - - StringVal result = StringFunctions::money_format(context, value1); - StringVal expected = AnyValUtil::from_string(ctx, std::string("3,333,333,333.22")); - EXPECT_EQ(expected, result); - - DecimalV2Value dv2(std::string("-740740740.71604938271975308642")); - DecimalV2Val value2; - dv2.to_decimal_val(&value2); - - result = StringFunctions::money_format(context, value2); - expected = AnyValUtil::from_string(ctx, std::string("-740,740,740.72")); - EXPECT_EQ(expected, result); - delete context; -} - -TEST_F(StringFunctionsTest, split_part) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("hello")), - StringFunctions::split_part(context, StringVal("hello word"), StringVal(" "), 1)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("word")), - StringFunctions::split_part(context, StringVal("hello word"), StringVal(" "), 2)); - - EXPECT_EQ(StringVal::null(), - StringFunctions::split_part(context, StringVal("hello word"), StringVal(" "), 3)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("")), - StringFunctions::split_part(context, StringVal("hello word"), StringVal("hello"), 1)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string(" word")), - StringFunctions::split_part(context, StringVal("hello word"), StringVal("hello"), 2)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("2019年9")), - StringFunctions::split_part(context, StringVal("2019年9月8日"), StringVal("月"), 1)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("")), - StringFunctions::split_part(context, StringVal("abcdabda"), StringVal("a"), 1)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("bcd")), - StringFunctions::split_part(context, StringVal("abcdabda"), StringVal("a"), 2)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("bd")), - StringFunctions::split_part(context, StringVal("abcdabda"), StringVal("a"), 3)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("")), - StringFunctions::split_part(context, StringVal("abcdabda"), StringVal("a"), 4)); - - EXPECT_EQ( - AnyValUtil::from_string(ctx, std::string("#123")), - StringFunctions::split_part(context, StringVal("abc###123###234"), StringVal("##"), 2)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("234")), - StringFunctions::split_part(context, StringVal("abc###123###234"), StringVal("##"), - -1)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("123#")), - StringFunctions::split_part(context, StringVal("abc###123###234"), StringVal("##"), - -2)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("abc#")), - StringFunctions::split_part(context, StringVal("abc###123###234"), StringVal("##"), - -3)); - - EXPECT_EQ(StringVal::null(), StringFunctions::split_part(context, StringVal("abc###123###234"), - StringVal("##"), -4)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("234")), - StringFunctions::split_part(context, StringVal("abc#123##234"), StringVal("#"), -1)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("")), - StringFunctions::split_part(context, StringVal("abc#123##234"), StringVal("#"), -2)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("123")), - StringFunctions::split_part(context, StringVal("abc#123##234"), StringVal("#"), -3)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("abc")), - StringFunctions::split_part(context, StringVal("abc#123##234"), StringVal("#"), -4)); - - EXPECT_EQ(StringVal::null(), - StringFunctions::split_part(context, StringVal("abc#123##234"), StringVal("#"), -5)); - - EXPECT_EQ(StringVal::null(), StringFunctions::split_part(context, StringVal("abc#123##234"), - StringVal("#"), IntVal::null())); - - EXPECT_EQ(StringVal::null(), StringFunctions::split_part(context, StringVal("abc#123##234"), - StringVal::null(), -1)); - - EXPECT_EQ( - AnyValUtil::from_string(ctx, std::string("")), - StringFunctions::split_part(context, StringVal("2019年9月-12月"), StringVal("月"), -1)); - EXPECT_EQ( - AnyValUtil::from_string(ctx, std::string("-12")), - StringFunctions::split_part(context, StringVal("2019年9月-12月"), StringVal("月"), -2)); - - EXPECT_EQ( - AnyValUtil::from_string(ctx, std::string("2019年9")), - StringFunctions::split_part(context, StringVal("2019年9月-12月"), StringVal("月"), -3)); - - EXPECT_EQ(StringVal::null(), StringFunctions::split_part(context, StringVal("2019年9月-12月"), - StringVal("月"), -4)); - delete context; -} - -TEST_F(StringFunctionsTest, ends_with) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - doris_udf::BooleanVal falseRet = doris_udf::BooleanVal(false); - doris_udf::BooleanVal trueRet = doris_udf::BooleanVal(true); - doris_udf::BooleanVal nullRet = doris_udf::BooleanVal::null(); - - EXPECT_EQ(trueRet, StringFunctions::ends_with(context, StringVal(""), StringVal(""))); - - EXPECT_EQ(trueRet, StringFunctions::ends_with(context, StringVal("hello"), StringVal(""))); - - EXPECT_EQ(falseRet, StringFunctions::ends_with(context, StringVal(""), StringVal("hello"))); - - EXPECT_EQ(trueRet, StringFunctions::ends_with(context, StringVal("hello"), StringVal("hello"))); - - EXPECT_EQ(trueRet, StringFunctions::ends_with(context, StringVal(" "), StringVal(" "))); - - EXPECT_EQ(falseRet, StringFunctions::ends_with(context, StringVal("hello"), StringVal(" "))); - - EXPECT_EQ(falseRet, StringFunctions::ends_with(context, StringVal(" "), StringVal("hello"))); - - EXPECT_EQ(falseRet, - StringFunctions::ends_with(context, StringVal("hello doris"), StringVal("hello"))); - - EXPECT_EQ(trueRet, - StringFunctions::ends_with(context, StringVal("hello doris"), StringVal("doris"))); - - EXPECT_EQ(trueRet, StringFunctions::ends_with(context, StringVal("hello doris"), - StringVal("hello doris"))); - - EXPECT_EQ(nullRet, StringFunctions::ends_with(context, StringVal("hello"), StringVal::null())); - - EXPECT_EQ(nullRet, StringFunctions::ends_with(context, StringVal::null(), StringVal("hello"))); - - EXPECT_EQ(nullRet, StringFunctions::ends_with(context, StringVal::null(), StringVal::null())); - delete context; -} - -TEST_F(StringFunctionsTest, starts_with) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - doris_udf::BooleanVal falseRet = doris_udf::BooleanVal(false); - doris_udf::BooleanVal trueRet = doris_udf::BooleanVal(true); - doris_udf::BooleanVal nullRet = doris_udf::BooleanVal::null(); - - EXPECT_EQ(trueRet, StringFunctions::starts_with(context, StringVal(""), StringVal(""))); - - EXPECT_EQ(trueRet, StringFunctions::starts_with(context, StringVal(" "), StringVal(" "))); - - EXPECT_EQ(trueRet, StringFunctions::starts_with(context, StringVal("hello"), StringVal(""))); - - EXPECT_EQ(falseRet, StringFunctions::starts_with(context, StringVal(""), StringVal("hello"))); - - EXPECT_EQ(trueRet, - StringFunctions::starts_with(context, StringVal("hello"), StringVal("hello"))); - - EXPECT_EQ(falseRet, StringFunctions::starts_with(context, StringVal("hello"), StringVal(" "))); - - EXPECT_EQ(falseRet, StringFunctions::starts_with(context, StringVal(" "), StringVal("world"))); - - EXPECT_EQ(trueRet, - StringFunctions::starts_with(context, StringVal("hello world"), StringVal("hello"))); - - EXPECT_EQ(falseRet, - StringFunctions::starts_with(context, StringVal("hello world"), StringVal("world"))); - - EXPECT_EQ(trueRet, StringFunctions::starts_with(context, StringVal("hello world"), - StringVal("hello world"))); - - EXPECT_EQ(nullRet, - StringFunctions::starts_with(context, StringVal("hello world"), StringVal::null())); - - EXPECT_EQ(nullRet, - StringFunctions::starts_with(context, StringVal::null(), StringVal("hello world"))); - - EXPECT_EQ(nullRet, StringFunctions::starts_with(context, StringVal::null(), StringVal::null())); - delete context; -} - -TEST_F(StringFunctionsTest, null_or_empty) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - doris_udf::BooleanVal falseRet = doris_udf::BooleanVal(false); - doris_udf::BooleanVal trueRet = doris_udf::BooleanVal(true); - - EXPECT_EQ(trueRet, StringFunctions::null_or_empty(context, StringVal(""))); - - EXPECT_EQ(falseRet, StringFunctions::null_or_empty(context, StringVal(" "))); - - EXPECT_EQ(falseRet, StringFunctions::null_or_empty(context, StringVal("hello"))); - - EXPECT_EQ(falseRet, StringFunctions::null_or_empty(context, StringVal("doris"))); - - EXPECT_EQ(falseRet, StringFunctions::null_or_empty(context, StringVal("111"))); - - EXPECT_EQ(falseRet, StringFunctions::null_or_empty(context, StringVal("."))); - - EXPECT_EQ(trueRet, StringFunctions::null_or_empty(context, StringVal::null())); - delete context; -} - -TEST_F(StringFunctionsTest, left) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("")), - StringFunctions::left(context, StringVal(""), 10)); - delete context; -} - -TEST_F(StringFunctionsTest, substring) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("")), - StringFunctions::substring(context, StringVal("hello word"), 0, 5)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("hello")), - StringFunctions::substring(context, StringVal("hello word"), 1, 5)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("word")), - StringFunctions::substring(context, StringVal("hello word"), 7, 4)); - - EXPECT_EQ(StringVal::null(), StringFunctions::substring(context, StringVal::null(), 1, 0)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("")), - StringFunctions::substring(context, StringVal("hello word"), 1, 0)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string(" word")), - StringFunctions::substring(context, StringVal("hello word"), -5, 5)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("hello word 你")), - StringFunctions::substring(context, StringVal("hello word 你好"), 1, 12)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("好")), - StringFunctions::substring(context, StringVal("hello word 你好"), 13, 1)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("")), - StringFunctions::substring(context, StringVal("hello word 你好"), 1, 0)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("rd 你好")), - StringFunctions::substring(context, StringVal("hello word 你好"), -5, 5)); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("h")), - StringFunctions::substring(context, StringVal("hello word 你好"), 1, 1)); - delete context; -} - -TEST_F(StringFunctionsTest, reverse) { - FunctionUtils fu; - doris_udf::FunctionContext* context = fu.get_fn_ctx(); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("olleh")), - StringFunctions::reverse(context, StringVal("hello"))); - EXPECT_EQ(StringVal::null(), StringFunctions::reverse(context, StringVal::null())); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("")), - StringFunctions::reverse(context, StringVal(""))); - - EXPECT_EQ(AnyValUtil::from_string(ctx, std::string("好你olleh")), - StringFunctions::reverse(context, StringVal("hello你好"))); -} - -TEST_F(StringFunctionsTest, length) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - - EXPECT_EQ(IntVal(5), StringFunctions::length(context, StringVal("hello"))); - EXPECT_EQ(IntVal(5), StringFunctions::char_utf8_length(context, StringVal("hello"))); - EXPECT_EQ(IntVal::null(), StringFunctions::length(context, StringVal::null())); - EXPECT_EQ(IntVal::null(), StringFunctions::char_utf8_length(context, StringVal::null())); - - EXPECT_EQ(IntVal(0), StringFunctions::length(context, StringVal(""))); - EXPECT_EQ(IntVal(0), StringFunctions::char_utf8_length(context, StringVal(""))); - - EXPECT_EQ(IntVal(11), StringFunctions::length(context, StringVal("hello你好"))); - - EXPECT_EQ(IntVal(7), StringFunctions::char_utf8_length(context, StringVal("hello你好"))); - delete context; -} - -TEST_F(StringFunctionsTest, append_trailing_char_if_absent) { - EXPECT_EQ(StringVal("ac"), - StringFunctions::append_trailing_char_if_absent(ctx, StringVal("a"), StringVal("c"))); - - EXPECT_EQ(StringVal("c"), - StringFunctions::append_trailing_char_if_absent(ctx, StringVal("c"), StringVal("c"))); - - EXPECT_EQ(StringVal("123c"), StringFunctions::append_trailing_char_if_absent( - ctx, StringVal("123c"), StringVal("c"))); - - EXPECT_EQ(StringVal("c"), - StringFunctions::append_trailing_char_if_absent(ctx, StringVal(""), StringVal("c"))); - - EXPECT_EQ(StringVal::null(), StringFunctions::append_trailing_char_if_absent( - ctx, StringVal::null(), StringVal("c"))); - - EXPECT_EQ(StringVal::null(), StringFunctions::append_trailing_char_if_absent( - ctx, StringVal("a"), StringVal::null())); - - EXPECT_EQ(StringVal::null(), StringFunctions::append_trailing_char_if_absent( - ctx, StringVal("a"), StringVal("abc"))); -} - -TEST_F(StringFunctionsTest, instr) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - EXPECT_EQ(IntVal(4), StringFunctions::instr(context, StringVal("foobarbar"), StringVal("bar"))); - EXPECT_EQ(IntVal(0), StringFunctions::instr(context, StringVal("foobar"), StringVal("xbar"))); - EXPECT_EQ(IntVal(2), StringFunctions::instr(context, StringVal("123456234"), StringVal("234"))); - EXPECT_EQ(IntVal(0), StringFunctions::instr(context, StringVal("123456"), StringVal("567"))); - EXPECT_EQ(IntVal(2), StringFunctions::instr(context, StringVal("1.234"), StringVal(".234"))); - EXPECT_EQ(IntVal(1), StringFunctions::instr(context, StringVal("1.234"), StringVal(""))); - EXPECT_EQ(IntVal(0), StringFunctions::instr(context, StringVal(""), StringVal("123"))); - EXPECT_EQ(IntVal(1), StringFunctions::instr(context, StringVal(""), StringVal(""))); - EXPECT_EQ(IntVal(3), StringFunctions::instr(context, StringVal("你好世界"), StringVal("世界"))); - EXPECT_EQ(IntVal(0), StringFunctions::instr(context, StringVal("你好世界"), StringVal("您好"))); - EXPECT_EQ(IntVal(3), StringFunctions::instr(context, StringVal("你好abc"), StringVal("a"))); - EXPECT_EQ(IntVal(3), StringFunctions::instr(context, StringVal("你好abc"), StringVal("abc"))); - EXPECT_EQ(IntVal::null(), StringFunctions::instr(context, StringVal::null(), StringVal("2"))); - EXPECT_EQ(IntVal::null(), StringFunctions::instr(context, StringVal(""), StringVal::null())); - EXPECT_EQ(IntVal::null(), - StringFunctions::instr(context, StringVal::null(), StringVal::null())); - delete context; -} - -TEST_F(StringFunctionsTest, locate) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - EXPECT_EQ(IntVal(4), - StringFunctions::locate(context, StringVal("bar"), StringVal("foobarbar"))); - EXPECT_EQ(IntVal(0), StringFunctions::locate(context, StringVal("xbar"), StringVal("foobar"))); - EXPECT_EQ(IntVal(2), - StringFunctions::locate(context, StringVal("234"), StringVal("123456234"))); - EXPECT_EQ(IntVal(0), StringFunctions::locate(context, StringVal("567"), StringVal("123456"))); - EXPECT_EQ(IntVal(2), StringFunctions::locate(context, StringVal(".234"), StringVal("1.234"))); - EXPECT_EQ(IntVal(1), StringFunctions::locate(context, StringVal(""), StringVal("1.234"))); - EXPECT_EQ(IntVal(0), StringFunctions::locate(context, StringVal("123"), StringVal(""))); - EXPECT_EQ(IntVal(1), StringFunctions::locate(context, StringVal(""), StringVal(""))); - EXPECT_EQ(IntVal(3), - StringFunctions::locate(context, StringVal("世界"), StringVal("你好世界"))); - EXPECT_EQ(IntVal(0), - StringFunctions::locate(context, StringVal("您好"), StringVal("你好世界"))); - EXPECT_EQ(IntVal(3), StringFunctions::locate(context, StringVal("a"), StringVal("你好abc"))); - EXPECT_EQ(IntVal(3), StringFunctions::locate(context, StringVal("abc"), StringVal("你好abc"))); - EXPECT_EQ(IntVal::null(), StringFunctions::locate(context, StringVal::null(), StringVal("2"))); - EXPECT_EQ(IntVal::null(), StringFunctions::locate(context, StringVal(""), StringVal::null())); - EXPECT_EQ(IntVal::null(), - StringFunctions::locate(context, StringVal::null(), StringVal::null())); - delete context; -} - -TEST_F(StringFunctionsTest, locate_pos) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - EXPECT_EQ(IntVal(7), StringFunctions::locate_pos(context, StringVal("bar"), - StringVal("foobarbar"), IntVal(5))); - EXPECT_EQ(IntVal(0), StringFunctions::locate_pos(context, StringVal("xbar"), - StringVal("foobar"), IntVal(1))); - EXPECT_EQ(IntVal(2), - StringFunctions::locate_pos(context, StringVal(""), StringVal("foobar"), IntVal(2))); - EXPECT_EQ(IntVal(0), - StringFunctions::locate_pos(context, StringVal("foobar"), StringVal(""), IntVal(1))); - EXPECT_EQ(IntVal(0), - StringFunctions::locate_pos(context, StringVal(""), StringVal(""), IntVal(2))); - EXPECT_EQ(IntVal(0), - StringFunctions::locate_pos(context, StringVal("A"), StringVal("AAAAAA"), IntVal(0))); - EXPECT_EQ(IntVal(0), StringFunctions::locate_pos(context, StringVal("A"), StringVal("大A写的A"), - IntVal(0))); - EXPECT_EQ(IntVal(2), StringFunctions::locate_pos(context, StringVal("A"), StringVal("大A写的A"), - IntVal(1))); - EXPECT_EQ(IntVal(2), StringFunctions::locate_pos(context, StringVal("A"), StringVal("大A写的A"), - IntVal(2))); - EXPECT_EQ(IntVal(5), StringFunctions::locate_pos(context, StringVal("A"), StringVal("大A写的A"), - IntVal(3))); - EXPECT_EQ(IntVal(7), StringFunctions::locate_pos(context, StringVal("BaR"), - StringVal("foobarBaR"), IntVal(5))); - EXPECT_EQ(IntVal::null(), - StringFunctions::locate_pos(context, StringVal::null(), StringVal("2"), IntVal(1))); - EXPECT_EQ(IntVal::null(), - StringFunctions::locate_pos(context, StringVal(""), StringVal::null(), IntVal(4))); - EXPECT_EQ(IntVal::null(), StringFunctions::locate_pos(context, StringVal::null(), - StringVal::null(), IntVal(4))); - EXPECT_EQ(IntVal::null(), StringFunctions::locate_pos(context, StringVal::null(), - StringVal::null(), IntVal(-1))); - delete context; -} - -TEST_F(StringFunctionsTest, lpad) { - EXPECT_EQ(StringVal("???hi"), - StringFunctions::lpad(ctx, StringVal("hi"), IntVal(5), StringVal("?"))); - EXPECT_EQ(StringVal("g8%7IgY%AHx7luNtf8Kh"), - StringFunctions::lpad(ctx, StringVal("g8%7IgY%AHx7luNtf8Kh"), IntVal(20), - StringVal(""))); - EXPECT_EQ(StringVal("h"), - StringFunctions::lpad(ctx, StringVal("hi"), IntVal(1), StringVal("?"))); - EXPECT_EQ(StringVal("你"), - StringFunctions::lpad(ctx, StringVal("你好"), IntVal(1), StringVal("?"))); - EXPECT_EQ(StringVal("你"), - StringFunctions::lpad(ctx, StringVal("你"), IntVal(1), StringVal("?"))); - EXPECT_EQ(StringVal(""), - StringFunctions::lpad(ctx, StringVal("hi"), IntVal(0), StringVal("?"))); - EXPECT_EQ(StringVal::null(), - StringFunctions::lpad(ctx, StringVal("hi"), IntVal(-1), StringVal("?"))); - EXPECT_EQ(StringVal("h"), - StringFunctions::lpad(ctx, StringVal("hi"), IntVal(1), StringVal(""))); - EXPECT_EQ(StringVal::null(), - StringFunctions::lpad(ctx, StringVal("hi"), IntVal(5), StringVal(""))); - EXPECT_EQ(StringVal("abahi"), - StringFunctions::lpad(ctx, StringVal("hi"), IntVal(5), StringVal("ab"))); - EXPECT_EQ(StringVal("ababhi"), - StringFunctions::lpad(ctx, StringVal("hi"), IntVal(6), StringVal("ab"))); - EXPECT_EQ(StringVal("呵呵呵hi"), - StringFunctions::lpad(ctx, StringVal("hi"), IntVal(5), StringVal("呵呵"))); - EXPECT_EQ(StringVal("hih呵呵"), - StringFunctions::lpad(ctx, StringVal("呵呵"), IntVal(5), StringVal("hi"))); -} - -TEST_F(StringFunctionsTest, rpad) { - EXPECT_EQ(StringVal("hi???"), - StringFunctions::rpad(ctx, StringVal("hi"), IntVal(5), StringVal("?"))); - EXPECT_EQ(StringVal("g8%7IgY%AHx7luNtf8Kh"), - StringFunctions::rpad(ctx, StringVal("g8%7IgY%AHx7luNtf8Kh"), IntVal(20), - StringVal(""))); - EXPECT_EQ(StringVal("h"), - StringFunctions::rpad(ctx, StringVal("hi"), IntVal(1), StringVal("?"))); - EXPECT_EQ(StringVal("你"), - StringFunctions::rpad(ctx, StringVal("你好"), IntVal(1), StringVal("?"))); - EXPECT_EQ(StringVal("你"), - StringFunctions::rpad(ctx, StringVal("你"), IntVal(1), StringVal("?"))); - EXPECT_EQ(StringVal(""), - StringFunctions::rpad(ctx, StringVal("hi"), IntVal(0), StringVal("?"))); - EXPECT_EQ(StringVal::null(), - StringFunctions::rpad(ctx, StringVal("hi"), IntVal(-1), StringVal("?"))); - EXPECT_EQ(StringVal("h"), - StringFunctions::rpad(ctx, StringVal("hi"), IntVal(1), StringVal(""))); - EXPECT_EQ(StringVal::null(), - StringFunctions::rpad(ctx, StringVal("hi"), IntVal(5), StringVal(""))); - EXPECT_EQ(StringVal("hiaba"), - StringFunctions::rpad(ctx, StringVal("hi"), IntVal(5), StringVal("ab"))); - EXPECT_EQ(StringVal("hiabab"), - StringFunctions::rpad(ctx, StringVal("hi"), IntVal(6), StringVal("ab"))); - EXPECT_EQ(StringVal("hi呵呵呵"), - StringFunctions::rpad(ctx, StringVal("hi"), IntVal(5), StringVal("呵呵"))); - EXPECT_EQ(StringVal("呵呵hih"), - StringFunctions::rpad(ctx, StringVal("呵呵"), IntVal(5), StringVal("hi"))); -} - -TEST_F(StringFunctionsTest, replace) { - //exist substring - EXPECT_EQ(StringVal("http://www.baidu.com:8080"), - StringFunctions::replace(ctx, StringVal("http://www.baidu.com:9090"), - StringVal("9090"), StringVal("8080"))); - - //not exist substring - EXPECT_EQ(StringVal("http://www.baidu.com:9090"), - StringFunctions::replace(ctx, StringVal("http://www.baidu.com:9090"), - StringVal("9070"), StringVal("8080"))); - - //old substring is empty - EXPECT_EQ(StringVal("http://www.baidu.com:9090"), - StringFunctions::replace(ctx, StringVal("http://www.baidu.com:9090"), StringVal(""), - StringVal("8080"))); - - //new substring is empty - EXPECT_EQ(StringVal("http://www.baidu.com:"), - StringFunctions::replace(ctx, StringVal("http://www.baidu.com:9090"), - StringVal("9090"), StringVal(""))); - - //origin string is null - EXPECT_EQ(StringVal::null(), StringFunctions::replace(ctx, StringVal::null(), - StringVal("hello"), StringVal("8080"))); - - //old substring is null - EXPECT_EQ(StringVal::null(), - StringFunctions::replace(ctx, StringVal("http://www.baidu.com:9090"), - StringVal::null(), StringVal("8080"))); - - //new substring is null - EXPECT_EQ(StringVal::null(), - StringFunctions::replace(ctx, StringVal("http://www.baidu.com:9090"), - StringVal("hello"), StringVal::null())); - - //substring contains Chinese character - EXPECT_EQ(StringVal("http://华夏zhongguo:9090"), - StringFunctions::replace(ctx, StringVal("http://中国hello:9090"), - StringVal("中国hello"), StringVal("华夏zhongguo"))); - - //old substring is at the beginning of string - EXPECT_EQ(StringVal("ftp://www.baidu.com:9090"), - StringFunctions::replace(ctx, StringVal("http://www.baidu.com:9090"), - StringVal("http"), StringVal("ftp"))); -} - -TEST_F(StringFunctionsTest, parse_url) { - EXPECT_EQ(StringVal("facebook.com"), - StringFunctions::parse_url(ctx, StringVal("http://facebook.com/path/p1.php?query=1"), - StringVal("AUTHORITY"))); - EXPECT_EQ(StringVal("facebook.com"), - StringFunctions::parse_url(ctx, StringVal("http://facebook.com/path/p1.php?query=1"), - StringVal("authority"))); - - EXPECT_EQ(StringVal("/a/b/c.php"), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090/a/b/c.php"), - StringVal("FILE"))); - EXPECT_EQ(StringVal("/a/b/c.php"), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090/a/b/c.php"), - StringVal("file"))); - - EXPECT_EQ(StringVal("/a/b/c.php"), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090/a/b/c.php"), - StringVal("PATH"))); - EXPECT_EQ(StringVal("/a/b/c.php"), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090/a/b/c.php"), - StringVal("path"))); - - EXPECT_EQ(StringVal("www.baidu.com"), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090"), - StringVal("HOST"))); - EXPECT_EQ(StringVal("www.baidu.com"), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090"), - StringVal("host"))); - - EXPECT_EQ(StringVal("http"), - StringFunctions::parse_url(ctx, StringVal("http://facebook.com/path/p1.php?query=1"), - StringVal("PROTOCOL"))); - EXPECT_EQ(StringVal("http"), - StringFunctions::parse_url(ctx, StringVal("http://facebook.com/path/p1.php?query=1"), - StringVal("protocol"))); - - EXPECT_EQ(StringVal("a=b"), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090?a=b"), - StringVal("QUERY"))); - EXPECT_EQ(StringVal("a=b"), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090?a=b"), - StringVal("query"))); - - EXPECT_EQ(StringVal::null(), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090?a=b"), - StringVal("REF"))); - EXPECT_EQ(StringVal::null(), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090?a=b"), - StringVal("ref"))); - - EXPECT_EQ(StringVal::null(), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090?a=b"), - StringVal("USERINFO"))); - EXPECT_EQ(StringVal::null(), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090?a=b"), - StringVal("userinfo"))); - - EXPECT_EQ(StringVal("9090"), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090?a=b"), - StringVal("PORT"))); - EXPECT_EQ(StringVal("9090"), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090/a/b/c?a=b"), - StringVal("PORT"))); - EXPECT_EQ(StringVal::null(), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com?a=b"), - StringVal("PORT"))); - EXPECT_EQ(StringVal("9090"), - StringFunctions::parse_url(ctx, StringVal("http://www.baidu.com:9090?a=b"), - StringVal("port"))); -} - -TEST_F(StringFunctionsTest, bit_length) { - doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); - - EXPECT_EQ(IntVal(40), StringFunctions::bit_length(context, StringVal("hello"))); - - EXPECT_EQ(IntVal::null(), StringFunctions::bit_length(context, StringVal::null())); - - EXPECT_EQ(IntVal(0), StringFunctions::bit_length(context, StringVal(""))); - - EXPECT_EQ(IntVal(88), StringFunctions::bit_length(context, StringVal("hello你好"))); - - delete context; -} - -TEST_F(StringFunctionsTest, lower) { - EXPECT_EQ(StringVal("hello"), StringFunctions::lower(ctx, StringVal("hello"))); - EXPECT_EQ(StringVal("hello"), StringFunctions::lower(ctx, StringVal("HELLO"))); - EXPECT_EQ(StringVal("hello123"), StringFunctions::lower(ctx, StringVal("HELLO123"))); - EXPECT_EQ(StringVal("hello, 123"), StringFunctions::lower(ctx, StringVal("HELLO, 123"))); - EXPECT_EQ(StringVal::null(), StringFunctions::lower(ctx, StringVal::null())); - EXPECT_EQ(StringVal(""), StringFunctions::lower(ctx, StringVal(""))); -} - -TEST_F(StringFunctionsTest, elt) { - StringVal str[] = {"hello", "world"}; - EXPECT_EQ(StringVal("hello"), StringFunctions::elt(ctx, 1, 2, str)); - EXPECT_EQ(StringVal("world"), StringFunctions::elt(ctx, 2, 2, str)); - EXPECT_EQ(StringVal::null(), StringFunctions::elt(ctx, 0, 2, str)); - EXPECT_EQ(StringVal::null(), StringFunctions::elt(ctx, 3, 2, str)); -} - -TEST_F(StringFunctionsTest, upper) { - // function test - EXPECT_EQ(StringVal("HELLO"), StringFunctions::upper(ctx, StringVal("HELLO"))); - EXPECT_EQ(StringVal("HELLO"), StringFunctions::upper(ctx, StringVal("hello"))); - EXPECT_EQ(StringVal("HELLO123"), StringFunctions::upper(ctx, StringVal("hello123"))); - EXPECT_EQ(StringVal("HELLO, 123"), StringFunctions::upper(ctx, StringVal("hello, 123"))); - EXPECT_EQ(StringVal::null(), StringFunctions::upper(ctx, StringVal::null())); - EXPECT_EQ(StringVal(""), StringFunctions::upper(ctx, StringVal(""))); -} - -TEST_F(StringFunctionsTest, ltrim) { - // no blank - StringVal src("hello worldaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - StringVal res = simd::VStringFunctions::ltrim(src); - EXPECT_EQ(src, res); - // empty string - StringVal src1(""); - res = simd::VStringFunctions::ltrim(src1); - EXPECT_EQ(src1, res); - // null string - StringVal src2(StringVal::null()); - res = simd::VStringFunctions::ltrim(src2); - EXPECT_EQ(src2, res); - // less than 16 blanks - StringVal src3(" hello worldaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - res = simd::VStringFunctions::ltrim(src3); - EXPECT_EQ(src, res); - // more than 16 blanks - StringVal src4(" hello worldaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - res = simd::VStringFunctions::ltrim(src4); - EXPECT_EQ(src, res); - // all are blanks, less than 16 blanks - StringVal src5(" "); - res = simd::VStringFunctions::ltrim(src5); - EXPECT_EQ(StringVal(""), res); - // all are blanks, more than 16 blanks - StringVal src6(" "); - res = simd::VStringFunctions::ltrim(src6); - EXPECT_EQ(StringVal(""), res); - // src less than 16 length - StringVal src7(" 12345678910"); - res = simd::VStringFunctions::ltrim(src7); - EXPECT_EQ(StringVal("12345678910"), res); -} - -TEST_F(StringFunctionsTest, rtrim) { - // no blank - StringVal src("hello worldaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); - StringVal res = simd::VStringFunctions::rtrim(src); - EXPECT_EQ(src, res); - // empty string - StringVal src1(""); - res = simd::VStringFunctions::rtrim(src1); - EXPECT_EQ(src1, res); - // null string - StringVal src2(StringVal::null()); - res = simd::VStringFunctions::rtrim(src2); - EXPECT_EQ(src2, res); - // less than 16 blanks - StringVal src3("hello worldaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa "); - res = simd::VStringFunctions::rtrim(src3); - EXPECT_EQ(src, res); - // more than 16 blanks - StringVal src4("hello worldaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa "); - res = simd::VStringFunctions::rtrim(src4); - EXPECT_EQ(src, res); - // all are blanks, less than 16 blanks - StringVal src5(" "); - res = simd::VStringFunctions::rtrim(src5); - EXPECT_EQ(StringVal(""), res); - // all are blanks, more than 16 blanks - StringVal src6(" "); - res = simd::VStringFunctions::rtrim(src6); - EXPECT_EQ(StringVal(""), res); - // src less than 16 length - StringVal src7("12345678910 "); - res = simd::VStringFunctions::rtrim(src7); - EXPECT_EQ(StringVal("12345678910"), res); -} - -TEST_F(StringFunctionsTest, is_ascii) { - EXPECT_EQ(true, simd::VStringFunctions::is_ascii(StringVal("hello123"))); - EXPECT_EQ(true, simd::VStringFunctions::is_ascii( - StringVal("hello123fwrewerwerwerwrsfqrwerwefwfwrwfsfwe"))); - EXPECT_EQ(false, simd::VStringFunctions::is_ascii(StringVal("运维组123"))); - EXPECT_EQ(false, simd::VStringFunctions::is_ascii( - StringVal("hello123运维组fwrewerwerwerwrsfqrwerwefwfwrwfsfwe"))); - EXPECT_EQ(true, simd::VStringFunctions::is_ascii(StringVal::null())); - EXPECT_EQ(true, simd::VStringFunctions::is_ascii(StringVal(""))); -} -} // namespace doris diff --git a/be/test/runtime/array_test.cpp b/be/test/runtime/array_test.cpp deleted file mode 100644 index 87f219ade7..0000000000 --- a/be/test/runtime/array_test.cpp +++ /dev/null @@ -1,873 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "gen_cpp/olap_file.pb.h" -#include "gen_cpp/segment_v2.pb.h" -#include "io/fs/file_system.h" -#include "io/fs/file_writer.h" -#include "io/fs/local_file_system.h" -#include "olap/field.h" -#include "olap/rowset/segment_v2/column_reader.h" -#include "olap/rowset/segment_v2/column_writer.h" -#include "olap/tablet_schema.h" -#include "olap/types.h" -#include "runtime/collection_value.h" -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" -#include "runtime/primitive_type.h" -#include "runtime/raw_value.h" -#include "testutil/array_utils.h" -#include "testutil/desc_tbl_builder.h" -#include "util/file_utils.h" -#include "util/uid_util.h" -#include "vec/columns/column.h" -#include "vec/columns/column_array.h" -#include "vec/core/block.h" -#include "vec/data_types/data_type_factory.hpp" - -namespace doris { - -template -ColumnPB create_column_pb(const std::string& type, const Ts&... sub_column_types) { - ColumnPB column; - auto prefix = "NOT_NULL_"; - column.set_is_nullable(type.compare(0, strlen(prefix), prefix) != 0); - column.set_type(column.is_nullable() ? type : type.substr(strlen(prefix))); - column.set_aggregation("NONE"); - if (type == "ARRAY") { - column.set_length(OLAP_ARRAY_MAX_BYTES); - } - if constexpr (sizeof...(sub_column_types) > 0) { - auto sub_column = create_column_pb(sub_column_types...); - column.add_children_columns()->Swap(&sub_column); - } - return column; -} - -TypeInfoPtr get_type_info(const ColumnPB& column_pb) { - TabletColumn tablet_column; - tablet_column.init_from_pb(column_pb); - return get_type_info(&tablet_column); -} - -std::unique_ptr create_field(const ColumnPB& column_pb) { - TabletColumn column; - column.init_from_pb(column_pb); - return std::unique_ptr(FieldFactory::create(column)); -} - -TypeDescriptor get_scalar_type_desc(const TypeInfo* type_info) { - switch (type_info->type()) { - case OLAP_FIELD_TYPE_BOOL: - return TypeDescriptor(TYPE_BOOLEAN); - case OLAP_FIELD_TYPE_TINYINT: - return TypeDescriptor(TYPE_TINYINT); - case OLAP_FIELD_TYPE_SMALLINT: - return TypeDescriptor(TYPE_SMALLINT); - case OLAP_FIELD_TYPE_INT: - return TypeDescriptor(TYPE_INT); - case OLAP_FIELD_TYPE_BIGINT: - return TypeDescriptor(TYPE_BIGINT); - case OLAP_FIELD_TYPE_LARGEINT: - return TypeDescriptor(TYPE_LARGEINT); - case OLAP_FIELD_TYPE_FLOAT: - return TypeDescriptor(TYPE_FLOAT); - case OLAP_FIELD_TYPE_DOUBLE: - return TypeDescriptor(TYPE_DOUBLE); - case OLAP_FIELD_TYPE_CHAR: - return TypeDescriptor::create_char_type(TypeDescriptor::MAX_CHAR_LENGTH); - case OLAP_FIELD_TYPE_VARCHAR: - return TypeDescriptor::create_varchar_type(TypeDescriptor::MAX_VARCHAR_LENGTH); - case OLAP_FIELD_TYPE_STRING: - return TypeDescriptor::create_string_type(); - case OLAP_FIELD_TYPE_DATE: - return TypeDescriptor(TYPE_DATE); - case OLAP_FIELD_TYPE_DATETIME: - return TypeDescriptor(TYPE_DATETIME); - case OLAP_FIELD_TYPE_DECIMAL: - return TypeDescriptor(TYPE_DECIMALV2); - default: - DCHECK(false) << "Failed to get the scalar type descriptor."; - } -} - -const TupleDescriptor* get_tuple_descriptor(ObjectPool& object_pool, const TypeInfo* type_info) { - DescriptorTblBuilder builder(&object_pool); - auto& tuple_desc_builder = builder.declare_tuple(); - if (type_info->type() == OLAP_FIELD_TYPE_ARRAY) { - TypeDescriptor type_desc(TYPE_ARRAY); - type_desc.len = OLAP_ARRAY_MAX_BYTES; - const auto* ptype = dynamic_cast(type_info)->item_type_info(); - while (ptype->type() == OLAP_FIELD_TYPE_ARRAY) { - type_desc.children.push_back(TypeDescriptor(TYPE_ARRAY)); - ptype = dynamic_cast(ptype)->item_type_info(); - } - type_desc.children.push_back(get_scalar_type_desc(ptype)); - tuple_desc_builder << type_desc; - } else { - tuple_desc_builder << get_scalar_type_desc(type_info); - } - return builder.build()->get_tuple_descriptor(0); -} - -CollectionValue* parse(MemPool& mem_pool, FunctionContext& context, const std::string& text, - const ColumnPB& column_pb) { - auto collection_value = - reinterpret_cast(mem_pool.allocate(sizeof(CollectionValue))); - auto status = ArrayUtils::create_collection_value(collection_value, &context, text); - if (!status.ok()) { - return nullptr; - } - return collection_value; -} - -void validate(const Field* field, const CollectionValue* expect, const CollectionValue* actual) { - EXPECT_TRUE(field->type_info()->equal(expect, actual)); -} - -class ArrayTest : public ::testing::Test { -public: - ArrayTest() : _mem_pool(new MemPool()) {} - - template - void test(const ColumnPB& column_pb, const std::vector& literal_arrays) { - auto field = create_field(column_pb); - const auto* type_info = field->type_info(); - const auto* tuple_desc = get_tuple_descriptor(_object_pool, type_info); - EXPECT_EQ(tuple_desc->slots().size(), 1); - - FunctionContext context; - ArrayUtils::prepare_context(context, *_mem_pool, column_pb); - - std::vector arrays; - for (const auto& literal_array : literal_arrays) { - arrays.push_back(parse(*_mem_pool, context, literal_array, column_pb)); - } - - for (auto array : arrays) { - test_array(column_pb, field.get(), tuple_desc, array); - } - test_direct_copy_array(field.get(), arrays); - test_write_and_read_column(column_pb, field.get(), arrays); - } - -protected: - void SetUp() override { - if (FileUtils::check_exist(TEST_DIR)) { - EXPECT_TRUE(FileUtils::remove_all(TEST_DIR).ok()); - } - EXPECT_TRUE(FileUtils::create_dir(TEST_DIR).ok()); - } - - void TearDown() override { - if (FileUtils::check_exist(TEST_DIR)) { - EXPECT_TRUE(FileUtils::remove_all(TEST_DIR).ok()); - } - } - -private: - void test_copy_array(const TupleDescriptor* tuple_desc, const Field* field, - const CollectionValue* array) { - auto slot_desc = tuple_desc->slots().front(); - const auto& item_type_desc = slot_desc->type().children[0]; - auto total_size = tuple_desc->byte_size() + array->get_byte_size(item_type_desc); - - auto src = allocate_tuple(total_size); - EXPECT_NE(src, nullptr); - - RawValue::write(array, src, slot_desc, _mem_pool.get()); - auto src_cv = reinterpret_cast(src->get_slot(slot_desc->tuple_offset())); - validate(field, array, src_cv); - - auto dst = allocate_tuple(total_size); - EXPECT_NE(dst, nullptr); - - src->deep_copy(dst, *tuple_desc, _mem_pool.get()); - auto dst_cv = reinterpret_cast(dst->get_slot(slot_desc->tuple_offset())); - validate(field, src_cv, dst_cv); - - dst->init(total_size); - int64_t offset = 0; - char* serialized_data = reinterpret_cast(dst); - src->deep_copy(*tuple_desc, &serialized_data, &offset, true); - EXPECT_EQ(total_size, offset); - EXPECT_EQ(total_size, serialized_data - reinterpret_cast(dst)); - dst_cv = reinterpret_cast(dst->get_slot(slot_desc->tuple_offset())); - CollectionValue::deserialize_collection(dst_cv, reinterpret_cast(dst), - item_type_desc); - validate(field, src_cv, dst_cv); - } - - Tuple* allocate_tuple(size_t size) { - auto tuple = reinterpret_cast(_mem_pool->allocate(size)); - if (tuple) { - tuple->init(size); - } - return tuple; - } - - void test_direct_copy_array(const Field* field, - const std::vector& arrays) { - CollectionValue cell; - std::unique_ptr variable_ptr(new char[field->length()]); - field->allocate_memory(reinterpret_cast(&cell), variable_ptr.get()); - EXPECT_EQ(cell.null_signs(), reinterpret_cast(variable_ptr.get())); - for (auto array : arrays) { - field->type_info()->direct_copy(&cell, array); - EXPECT_EQ(cell.null_signs(), reinterpret_cast(variable_ptr.get())); - validate(field, array, &cell); - } - } - - template - void test_write_and_read_column(const ColumnPB& column_pb, const Field* field, - const std::vector& arrays) { - auto filename = generate_uuid_string(); - const std::string path = TEST_DIR + "/" + filename; - LOG(INFO) << "Test path: " << path; - - segment_v2::ColumnMetaPB meta; - init_column_meta(&meta, column_pb); - - TabletColumn tablet_column; - tablet_column.init_from_pb(column_pb); - Schema schema({tablet_column}, 0); - { - auto file_writer = creat_file_writer(path); - EXPECT_NE(file_writer, nullptr); - auto writer = create_column_writer(file_writer.get(), - meta, column_pb); - EXPECT_NE(writer, nullptr); - Status st; - for (auto array : arrays) { - st = writer->append(false, const_cast(array)); - EXPECT_TRUE(st.ok()); - } - EXPECT_TRUE(writer->finish().ok()); - EXPECT_TRUE(writer->write_data().ok()); - EXPECT_TRUE(writer->write_ordinal_index().ok()); - EXPECT_TRUE(writer->write_zone_map().ok()); - - EXPECT_TRUE(file_writer->close().ok()); - } - { - auto type_info = get_type_info(column_pb); - auto tuple_desc = get_tuple_descriptor(_object_pool, type_info.get()); - - auto reader = create_column_reader(path, meta, arrays.size()); - EXPECT_NE(reader, nullptr); - auto rblock = create_readable_block(path); - EXPECT_NE(rblock, nullptr); - OlapReaderStatistics stats; - std::unique_ptr iter( - new_iterator(rblock.get(), &stats, reader.get())); - EXPECT_NE(iter, nullptr); - auto st = iter->seek_to_first(); - EXPECT_TRUE(st.ok()) << st.to_string(); - - auto data_type = - vectorized::DataTypeFactory::instance().create_data_type(tablet_column); - auto column_ptr = data_type->create_column(); - size_t rows_read = 1024; - column_ptr->reserve(rows_read); - do { - bool has_null = false; - st = iter->next_batch(&rows_read, column_ptr, &has_null); - EXPECT_TRUE(st.ok()); - vectorized::Block vblock; - vblock.insert({const_cast(*column_ptr).get_ptr(), - data_type, ""}); - for (int i = 0; i < arrays.size(); ++i) { - auto tuple = vblock.deep_copy_tuple(*tuple_desc, _mem_pool.get(), i, 0, false); - auto actual = - tuple->get_collection_slot(tuple_desc->slots().front()->tuple_offset()); - validate(field, arrays[i], actual); - } - } while (rows_read >= 1024); - } - } - - template - void init_column_meta(segment_v2::ColumnMetaPB* meta, const ColumnPB& column_pb) { - int column_id = 0; - TabletColumn column; - column.init_from_pb(column_pb); - init_column_meta(meta, &column_id, column); - } - - template - void init_column_meta(segment_v2::ColumnMetaPB* meta, int* column_id, - const TabletColumn& column) { - meta->set_column_id(*column_id); - meta->set_unique_id((*column_id)++); - meta->set_type(column.type()); - meta->set_length(column.length()); - if (column.type() == OLAP_FIELD_TYPE_ARRAY) { - meta->set_encoding(array_encoding); - } else { - meta->set_encoding(item_encoding); - } - meta->set_compression(segment_v2::LZ4F); - meta->set_is_nullable(column.is_nullable()); - for (uint32_t i = 0; i < column.get_subtype_count(); ++i) { - init_column_meta(meta->add_children_columns(), column_id, - column.get_sub_column(i)); - } - } - - io::FileWriterPtr creat_file_writer(const std::string& path) { - io::FileWriterPtr file_writer; - io::global_local_filesystem()->create_file(path, &file_writer); - return file_writer; - } - - template - std::unique_ptr create_column_writer(io::FileWriter* file_writer, - segment_v2::ColumnMetaPB& meta, - const ColumnPB& column_pb) { - segment_v2::ColumnWriterOptions writer_opts = {.meta = &meta}; - TabletColumn column; - column.init_from_pb(column_pb); - std::unique_ptr writer; - auto st = segment_v2::ColumnWriter::create(writer_opts, &column, file_writer, &writer); - if (!st.ok()) { - return nullptr; - } - st = writer->init(); - return st.ok() ? std::move(writer) : nullptr; - } - - std::unique_ptr create_column_reader( - const std::string& path, const segment_v2::ColumnMetaPB& meta, size_t num_rows) { - segment_v2::ColumnReaderOptions reader_opts; - std::unique_ptr reader; - auto st = segment_v2::ColumnReader::create(reader_opts, meta, num_rows, - io::global_local_filesystem(), path, &reader); - return st.ok() ? std::move(reader) : nullptr; - } - - io::FileReaderSPtr create_readable_block(const std::string& path) { - io::FileReaderSPtr reader; - auto st = io::global_local_filesystem()->open_file(path, &reader, nullptr); - return st.ok() ? std::move(reader) : nullptr; - } - - segment_v2::ColumnIterator* new_iterator(io::FileReader* rblock, OlapReaderStatistics* stats, - segment_v2::ColumnReader* reader) { - segment_v2::ColumnIterator* iter = nullptr; - auto st = reader->new_iterator(&iter); - if (!st.ok()) { - return nullptr; - } - segment_v2::ColumnIteratorOptions iter_opts; - iter_opts.stats = stats; - iter_opts.file_reader = rblock; - st = iter->init(iter_opts); - return st.ok() ? iter : nullptr; - } - - template - void test_array(const ColumnPB& column_pb, const Field* field, - const TupleDescriptor* tuple_desc, const CollectionValue* array) { - EXPECT_NE(array, nullptr); - test_copy_array(tuple_desc, field, array); - test_direct_copy_array(field, {array}); - test_write_and_read_column(column_pb, field, {array}); - } - -private: - static constexpr size_t MAX_MEMORY_BYTES = 1024 * 1024; - static const std::string TEST_DIR; - std::unique_ptr _mem_pool; - ObjectPool _object_pool; -}; - -const std::string ArrayTest::TEST_DIR = "./ut_dir/array_test"; - -TEST_F(ArrayTest, TestBoolean) { - // depth 1 - auto column_pb = create_column_pb("ARRAY", "BOOLEAN"); - std::vector literal_arrays = { - "[]", - "[null]", - "[true, false, false]", - "[true, null, false]", - "[false, null, null]", - "[null, null, true]", - "[null, null, null]", - }; - test(column_pb, literal_arrays); - - // depth 2 - column_pb = create_column_pb("ARRAY", "ARRAY", "BOOLEAN"); - literal_arrays = { - "[]", - "[[]]", - "[[false, true, false], [true, false, true]]", - "[[false, true, false], null, [true, false, true]]", - "[[false, true, null], null, [true, null, false], null, [null, false, false]]", - }; - test(column_pb, literal_arrays); - - // depth 3 - column_pb = create_column_pb("ARRAY", "ARRAY", "ARRAY", "BOOLEAN"); - literal_arrays = { - "[]", - "[[]]", - "[[[]]]", - "[[[null]], [[false], [true, false]], [[false, true, false], null, null]]", - }; - test(column_pb, literal_arrays); -} - -TEST_F(ArrayTest, TestNotNullBoolean) { - // depth 1 - auto column_pb = create_column_pb("ARRAY", "NOT_NULL_BOOLEAN"); - std::vector literal_arrays = { - "[]", - "[true, false, false]", - }; - test(column_pb, literal_arrays); - - // depth 2 - column_pb = create_column_pb("ARRAY", "ARRAY", "NOT_NULL_BOOLEAN"); - literal_arrays = { - "[]", - "[[]]", - "[[false, true, false]]", - "[[false, true, false], [true, false, true]]", - }; - test(column_pb, literal_arrays); - - // depth 3 - column_pb = create_column_pb("ARRAY", "ARRAY", "ARRAY", "NOT_NULL_BOOLEAN"); - literal_arrays = { - "[]", - "[[]]", - "[[[]]]", - "[[[]], [[false], [true, false]], [[false, true, false]]]", - }; - test(column_pb, literal_arrays); -} - -void test_integer(const std::string& type, ArrayTest& test_suite) { - // depth 1 - auto column_pb = create_column_pb("ARRAY", type); - std::vector literal_arrays = { - "[]", - "[null]", - "[1, 2, 3]", - "[1, null, 3]", - "[1, null, null]", - "[null, null, 3]", - "[null, null, null]", - }; - test_suite.test(column_pb, - literal_arrays); - - // depth 2 - column_pb = create_column_pb("ARRAY", "ARRAY", type); - literal_arrays = { - "[]", - "[[]]", - "[[1, 2, 3], [4, 5, 6]]", - "[[1, 2, 3], null, [4, 5, 6]]", - "[[1, 2, null], null, [4, null, 6], null, [null, 8, 9]]", - }; - test_suite.test(column_pb, - literal_arrays); - - // depth 3 - column_pb = create_column_pb("ARRAY", "ARRAY", "ARRAY", type); - literal_arrays = { - "[]", - "[[]]", - "[[[]]]", - "[[[null]], [[1], [2, 3]], [[4, 5, 6], null, null]]", - }; - test_suite.test(column_pb, - literal_arrays); -} - -TEST_F(ArrayTest, TestInteger) { - test_integer("TINYINT", *this); - test_integer("SMALLINT", *this); - test_integer("INT", *this); - test_integer("BIGINT", *this); - test_integer("LARGEINT", *this); -} - -void test_not_null_integer(const std::string& type, ArrayTest& test_suite) { - // depth 1 - auto column_pb = create_column_pb("ARRAY", type); - std::vector literal_arrays = { - "[]", - "[1, 2, 3]", - }; - test_suite.test(column_pb, - literal_arrays); - - // depth 2 - column_pb = create_column_pb("ARRAY", "ARRAY", type); - literal_arrays = { - "[]", - "[[]]", - "[[1, 2, 3]]", - "[[1, 2, 3], [4, 5, 6]]", - }; - test_suite.test(column_pb, - literal_arrays); - - // depth 3 - column_pb = create_column_pb("ARRAY", "ARRAY", "ARRAY", type); - literal_arrays = { - "[]", "[[]]", "[[[]]]", "[[[1, 2, 3]]]", "[[[]], [[1], [2, 3]], [[4, 5, 6]]]", - }; - test_suite.test(column_pb, - literal_arrays); -} - -TEST_F(ArrayTest, TestNotNullInteger) { - test_not_null_integer("NOT_NULL_TINYINT", *this); - test_not_null_integer("NOT_NULL_SMALLINT", *this); - test_not_null_integer("NOT_NULL_INT", *this); - test_not_null_integer("NOT_NULL_BIGINT", *this); - test_not_null_integer("NOT_NULL_LARGEINT", *this); -} - -void test_float(const std::string& type, ArrayTest& test_suite) { - // depth 1 - auto column_pb = create_column_pb("ARRAY", type); - std::vector literal_arrays = { - "[]", - "[null]", - "[1.5, 2.5, 3.5]", - "[1.5, null, 3.5]", - "[1.5, null, null]", - "[null, null, 3.5]", - "[null, null, null]", - }; - test_suite.test(column_pb, - literal_arrays); - // depth 2 - column_pb = create_column_pb("ARRAY", "ARRAY", type); - literal_arrays = { - "[]", - "[[]]", - "[[1.5, 2.5, 3.5], [4.5, 5.5, 6.5]]", - "[[1.5, 2.5, 3.5], null, [4.5, 5.5, 6.5]]", - "[[1.5, 2.5, null], null, [4.5, null, 6.5], null, [null, 8.5, 9.5]]", - }; - test_suite.test(column_pb, - literal_arrays); - - // depth 3 - column_pb = create_column_pb("ARRAY", "ARRAY", "ARRAY", type); - literal_arrays = { - "[]", - "[[]]", - "[[[]]]", - "[[[null]], [[1.5], [2.5, 3.5]], [[4.5, 5.5, 6.5], null, null]]", - }; - test_suite.test(column_pb, - literal_arrays); -} - -TEST_F(ArrayTest, TestFloat) { - test_float("FLOAT", *this); - test_float("DOUBLE", *this); -} - -void test_not_null_float(const std::string& type, ArrayTest& test_suite) { - // depth 1 - auto column_pb = create_column_pb("ARRAY", type); - std::vector literal_arrays = { - "[]", - "[1.5, 2.5, 3.5]", - }; - test_suite.test(column_pb, - literal_arrays); - // depth 2 - column_pb = create_column_pb("ARRAY", "ARRAY", type); - literal_arrays = { - "[]", - "[[]]", - "[[1.5, 2.5, 3.5]]", - "[[1.5, 2.5, 3.5], [4.5, 5.5, 6.5]]", - }; - test_suite.test(column_pb, - literal_arrays); - - // depth 3 - column_pb = create_column_pb("ARRAY", "ARRAY", "ARRAY", type); - literal_arrays = { - "[]", "[[]]", "[[[]]]", "[[[1.5]]]", "[[[]], [[1.5], [2.5, 3.5]], [[4.5, 5.5, 6.5]]]", - }; - test_suite.test(column_pb, - literal_arrays); -} - -TEST_F(ArrayTest, TestNotNullFloat) { - test_not_null_float("NOT_NULL_FLOAT", *this); - test_not_null_float("NOT_NULL_DOUBLE", *this); -} - -void test_string(const std::string& type, ArrayTest& test_suite) { - // depth 1 - auto column_pb = create_column_pb("ARRAY", type); - std::vector literal_arrays = { - "[]", - "[null]", - "[\"a\", \"b\", \"c\"]", - "[null, \"b\", \"c\"]", - "[\"a\", null, \"c\"]", - "[\"a\", \"b\", null]", - "[null, \"b\", null]", - "[null, null, null]", - }; - test_suite.test(column_pb, - literal_arrays); - - // more depths - column_pb = create_column_pb("ARRAY", "ARRAY", "ARRAY", type); - literal_arrays = { - "[]", - "[[]]", - "[[[]]]", - "[null, [null], [[null]]]", - "[[[\"a\", null, \"c\"], [\"d\", \"e\", \"f\"]], null, [[\"g\"]]]", - }; - test_suite.test(column_pb, - literal_arrays); -} - -TEST_F(ArrayTest, TestString) { - test_string("CHAR", *this); - test_string("VARCHAR", *this); - test_string("STRING", *this); -} - -void test_not_null_string(const std::string& type, ArrayTest& test_suite) { - // depth 1 - auto column_pb = create_column_pb("ARRAY", type); - std::vector literal_arrays = { - "[]", - "[\"a\", \"b\", \"c\"]", - }; - test_suite.test(column_pb, - literal_arrays); - - // more depths - column_pb = create_column_pb("ARRAY", "ARRAY", "ARRAY", type); - literal_arrays = { - "[]", - "[[]]", - "[[[]]]", - "[[[\"a\", \"b\", \"c\"]]]", - "[[[\"a\", \"c\"], [\"d\", \"e\", \"f\"]], [[\"g\"]]]", - }; - test_suite.test(column_pb, - literal_arrays); -} - -TEST_F(ArrayTest, TestNotNullString) { - test_not_null_string("NOT_NULL_CHAR", *this); - test_not_null_string("NOT_NULL_VARCHAR", *this); - test_not_null_string("NOT_NULL_STRING", *this); -} - -void test_datetime(const std::string& type, ArrayTest& test_suite) { - auto column_pb = create_column_pb("ARRAY", type); - std::vector literal_arrays; - if (type == "DATE") { - literal_arrays = { - "[]", - "[null]", - "[\"2022-04-01\", \"2022-04-02\", \"2022-04-03\"]", - "[\"2022-04-01\", null, \"2022-04-03\"]", - "[\"2022-04-01\", null, null]", - "[null, null, \"2022-04-03\"]", - "[null, null, null]", - }; - } else { - literal_arrays = { - "[]", - "[null]", - "[\"2022-04-01 19:30:40\", \"2022-04-02 19:30:40 \", \"2022-04-03 19:30:40\"]", - "[\"2022-04-01 19:30:40\", null, \"2022-04-03 19:30:40\"]", - "[\"2022-04-01 19:30:40\", null, null]", - "[null, null, \"2022-04-03 19:30:40\"]", - "[null, null, null]", - }; - } - test_suite.test(column_pb, - literal_arrays); - // depth 2 - column_pb = create_column_pb("ARRAY", "ARRAY", type); - if (type == "DATE") { - literal_arrays = { - "[]", - "[[]]", - "[[\"2022-04-01\", \"2022-04-02\", \"2022-04-03\"], [\"2022-04-04\", " - "\"2022-04-05\", " - "\"2022-04-06\"]]", - "[[\"2022-04-01\", \"2022-04-02\", \"2022-04-03\"], null, [\"2022-04-04\", " - "\"2022-04-05\", \"2022-04-06\"]]", - "[[\"2022-04-01\", \"2022-04-02\", null], null, [\"2022-04-04\", null, " - "\"2022-04-06\"], null, [null, \"2022-04-08\", \"2022-04-09\"]]", - }; - } else { - literal_arrays = { - "[]", - "[[]]", - "[[\"2022-04-01 19:30:40\", \"2022-04-02 19:30:40\", \"2022-04-03 19:30:40\"], " - "[\"2022-04-04 19:30:40\", " - "\"2022-04-05\", " - "\"2022-04-06\"]]", - "[[\"2022-04-01 19:30:40\", \"2022-04-02 19:30:40\", \"2022-04-03 19:30:40\"], " - "null, [\"2022-04-04 19:30:40\", " - "\"2022-04-05\", \"2022-04-06\"]]", - "[[\"2022-04-01 19:30:40\", \"2022-04-02 19:30:40\", null], null, [\"2022-04-04 " - "19:30:40\", null, " - "\"2022-04-06 19:30:40\"], null, [null, \"2022-04-08 19:30:40\", \"2022-04-09 " - "19:30:40\"]]", - }; - } - test_suite.test(column_pb, - literal_arrays); - - // depth 3 - column_pb = create_column_pb("ARRAY", "ARRAY", "ARRAY", type); - if (type == "DATE") { - literal_arrays = { - "[]", - "[[]]", - "[[[]]]", - "[[[null]], [[\"2022-04-01\"], [\"2022-04-02\", \"2022-04-03\"]], " - "[[\"2022-04-04\", " - "\"2022-04-05\", \"2022-04-06\"], null, null]]", - }; - } else { - literal_arrays = { - "[]", - "[[]]", - "[[[]]]", - "[[[null]], [[\"2022-04-01 19:30:40\"], [\"2022-04-02 19:30:40\", \"2022-04-03 " - "19:30:40\"]], " - "[[\"2022-04-04 19:30:40\", " - "\"2022-04-05 19:30:40\", \"2022-04-06 19:30:40\"], null, null]]", - }; - } - test_suite.test(column_pb, - literal_arrays); -} - -TEST_F(ArrayTest, TestDateTime) { - test_datetime("DATE", *this); - test_datetime("DATETIME", *this); -} - -void test_not_null_datetime(const std::string& type, ArrayTest& test_suite) { - auto column_pb = create_column_pb("ARRAY", type); - std::vector literal_arrays; - if (type == "DATE") { - literal_arrays = { - "[]", - "[\"2022-04-01\", \"2022-04-02\", \"2022-04-03\"]", - }; - } else { - literal_arrays = { - "[]", - "[\"2022-04-01 19:30:40\", \"2022-04-02 19:30:40 \", \"2022-04-03 19:30:40\"]", - }; - } - test_suite.test(column_pb, - literal_arrays); - // depth 2 - column_pb = create_column_pb("ARRAY", "ARRAY", type); - if (type == "DATE") { - literal_arrays = { - "[]", - "[[]]", - "[[\"2022-04-01\", \"2022-04-02\", \"2022-04-03\"], [\"2022-04-04\", " - "\"2022-04-05\", " - "\"2022-04-06\"]]", - }; - } else { - literal_arrays = { - "[]", - "[[]]", - "[[\"2022-04-01 19:30:40\", \"2022-04-02 19:30:40\", \"2022-04-03 19:30:40\"], " - "[\"2022-04-04 19:30:40\", " - "\"2022-04-05\", " - "\"2022-04-06\"]]", - }; - } - test_suite.test(column_pb, - literal_arrays); - - // depth 3 - column_pb = create_column_pb("ARRAY", "ARRAY", "ARRAY", type); - if (type == "DATE") { - literal_arrays = { - "[]", - "[[]]", - "[[[]]]", - "[[[\"2022-04-01\"]]]", - "[[[]], [[\"2022-04-01\"], [\"2022-04-02\", \"2022-04-03\"]], " - "[[\"2022-04-04\", " - "\"2022-04-05\", \"2022-04-06\"]]]", - }; - } else { - literal_arrays = { - "[]", - "[[]]", - "[[[]]]", - "[[[\"2022-04-01 19:30:40\"]]]", - "[[[]], [[\"2022-04-01 19:30:40\"], [\"2022-04-02 19:30:40\", \"2022-04-03 " - "19:30:40\"]], " - "[[\"2022-04-04 19:30:40\", " - "\"2022-04-05 19:30:40\", \"2022-04-06 19:30:40\"]]]", - }; - } - test_suite.test(column_pb, - literal_arrays); -} - -TEST_F(ArrayTest, TestNotNullDateTime) { - test_not_null_datetime("NOT_NULL_DATE", *this); - test_not_null_datetime("NOT_NULL_DATETIME", *this); -} - -TEST_F(ArrayTest, TestDecimal) { - test_integer("DECIMAL", *this); - test_not_null_integer("NOT_NULL_DECIMAL", *this); - test_float("DECIMAL", *this); - test_not_null_float("NOT_NULL_DECIMAL", *this); -} - -} // namespace doris diff --git a/be/test/runtime/collection_value_test.cpp b/be/test/runtime/collection_value_test.cpp deleted file mode 100644 index ce2fc7adec..0000000000 --- a/be/test/runtime/collection_value_test.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/collection_value.h" - -#include - -#include "common/object_pool.h" -#include "string" -#include "util/bitmap.h" - -#define private public - -namespace doris { - -TEST(CollectionValueTest, init) { - { - CollectionValue cv; - - ObjectPool pool; - EXPECT_TRUE(CollectionValue::init_collection(&pool, 10, TYPE_INT, &cv).ok()); - - EXPECT_EQ(10, cv.size()); - - for (int j = 0; j < 10; ++j) { - EXPECT_FALSE(*(cv._null_signs + j)); - } - - EXPECT_FALSE(CollectionValue::init_collection(&pool, 10, TYPE_INT, nullptr).ok()); - - CollectionValue cv_null; - bzero(&cv_null, sizeof(cv_null)); - EXPECT_TRUE(CollectionValue::init_collection(&pool, 0, TYPE_INT, &cv_null).ok()); - EXPECT_EQ(0, cv_null.size()); - } - - { - CollectionValue cv; - ObjectPool pool; - EXPECT_TRUE(CollectionValue::init_collection(&pool, 10, TYPE_INT, &cv).ok()); - } -} - -TEST(CollectionValueTest, set) { - CollectionValue cv; - ObjectPool pool; - EXPECT_TRUE(CollectionValue::init_collection(&pool, 10, TYPE_INT, &cv).ok()); - - // normal - { - auto iterator = cv.iterator(TYPE_INT); - IntVal v0 = IntVal::null(); - iterator.set(&v0); - for (int j = 1; j < cv.size(); ++j) { - IntVal i(j + 10); - iterator.seek(j); - iterator.set(&i); - } - } - - { - auto iter = cv.iterator(TYPE_INT); - IntVal v0; - iter.get(&v0); - EXPECT_TRUE(v0.is_null); - EXPECT_TRUE(iter.is_null()); - iter.next(); - for (int k = 1; k < cv.size(); ++k, iter.next()) { - IntVal v; - iter.get(&v); - EXPECT_EQ(k + 10, v.val); - } - } - - // over size - { - IntVal intv(20); - auto iterator = cv.iterator(TYPE_INT); - EXPECT_FALSE(iterator.seek(10)); - } -} -} // namespace doris diff --git a/be/test/testutil/array_utils.cpp b/be/test/testutil/array_utils.cpp index 834f8c6157..8fcb7061b5 100644 --- a/be/test/testutil/array_utils.cpp +++ b/be/test/testutil/array_utils.cpp @@ -25,7 +25,6 @@ #include "runtime/mem_pool.h" #include "runtime/memory/mem_tracker.h" #include "udf/udf_internal.h" -#include "util/array_parser.h" namespace doris { @@ -38,19 +37,6 @@ void ArrayUtils::prepare_context(FunctionContext& context, MemPool& mem_pool, context.impl()->_pool = new FreePool(&mem_pool); } -Status ArrayUtils::create_collection_value(CollectionValue* collection_value, - FunctionContext* context, - const std::string& json_string) { - CollectionVal collection_val; - auto status = ArrayParser::parse(collection_val, context, StringVal(json_string.c_str())); - if (!status.ok()) { - return status; - } - new (collection_value) CollectionValue(collection_val.data, collection_val.length, - collection_val.has_null, collection_val.null_signs); - return Status::OK(); -} - TypeDesc ArrayUtils::create_function_type_desc(const ColumnPB& column_pb) { TypeDesc type_desc; type_desc.len = column_pb.length(); diff --git a/be/test/testutil/array_utils.h b/be/test/testutil/array_utils.h index 85cc0434d5..f6e79a0804 100644 --- a/be/test/testutil/array_utils.h +++ b/be/test/testutil/array_utils.h @@ -33,8 +33,6 @@ public: using TypeDesc = FunctionContext::TypeDesc; static void prepare_context(FunctionContext& context, MemPool& mem_pool, const ColumnPB& column_pb); - static Status create_collection_value(CollectionValue* collection_value, - FunctionContext* context, const std::string& json_string); private: static TypeDesc create_function_type_desc(const ColumnPB& column_pb); diff --git a/be/test/util/array_parser_test.cpp b/be/test/util/array_parser_test.cpp deleted file mode 100644 index 1b0f93823b..0000000000 --- a/be/test/util/array_parser_test.cpp +++ /dev/null @@ -1,189 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include -#include - -#include "olap/tablet_schema.h" -#include "olap/types.h" -#include "testutil/array_utils.h" -#include "vec/common/string_ref.h" - -namespace doris { - -template -ColumnPB create_column_pb(const std::string& type, const Ts&... sub_column_types) { - ColumnPB column; - column.set_type(type); - column.set_aggregation("NONE"); - column.set_is_nullable(true); - if (type == "ARRAY") { - column.set_length(OLAP_ARRAY_MAX_BYTES); - } - if constexpr (sizeof...(sub_column_types) > 0) { - auto sub_column = create_column_pb(sub_column_types...); - column.add_children_columns()->Swap(&sub_column); - } - return column; -} - -static TypeInfoPtr get_type_info(const ColumnPB& column_pb) { - TabletColumn tablet_column; - tablet_column.init_from_pb(column_pb); - return get_type_info(&tablet_column); -} - -static void test_array_parser(const ColumnPB& column_pb, const std::string& json, - const CollectionValue& expect) { - MemPool mem_pool; - FunctionContext context; - ArrayUtils::prepare_context(context, mem_pool, column_pb); - CollectionValue actual; - auto status = ArrayUtils::create_collection_value(&actual, &context, json); - EXPECT_TRUE(status.ok()); - EXPECT_TRUE(get_type_info(column_pb)->equal(&expect, &actual)); -} - -TEST(ArrayParserTest, TestParseIntArray) { - auto column_pb = create_column_pb("ARRAY", "INT"); - test_array_parser(column_pb, "[]", CollectionValue(0)); - - int32_t data[] = {1, 2, 3}; - int num_items = sizeof(data) / sizeof(data[0]); - CollectionValue value(data, num_items, false, nullptr); - test_array_parser(column_pb, "[1, 2, 3]", value); - - bool null_signs[] = {false, true, false}; - value.set_has_null(true); - value.set_null_signs(null_signs); - test_array_parser(column_pb, "[1, null, 3]", value); -} - -TEST(ArrayParserTest, TestParseVarcharArray) { - auto column_pb = create_column_pb("ARRAY", "VARCHAR"); - test_array_parser(column_pb, "[]", CollectionValue(0)); - - char data[] = {'a', 'b', 'c'}; - int num_items = sizeof(data) / sizeof(data[0]); - StringRef string_values[] = { - {&data[0], 1}, - {&data[1], 1}, - {&data[2], 1}, - }; - CollectionValue value(string_values, num_items, false, nullptr); - test_array_parser(column_pb, "[\"a\", \"b\", \"c\"]", value); - - bool null_signs[] = {false, true, false}; - value.set_has_null(true); - value.set_null_signs(null_signs); - test_array_parser(column_pb, "[\"a\", null, \"c\"]", value); -} - -TEST(ArrayParserTest, TestNestedArray) { - auto column_pb = create_column_pb("ARRAY", "ARRAY", "INT"); - test_array_parser(column_pb, "[]", CollectionValue(0)); - - CollectionValue empty_array(0); - test_array_parser(column_pb, "[[]]", {&empty_array, 1, false, nullptr}); - - int data[] = {1, 0, 3}; - uint32_t num_items = sizeof(data) / sizeof(data[0]); - bool null_signs[] = {false, true, false}; - CollectionValue array = {data, num_items, true, null_signs}; - - CollectionValue array_data[] = {empty_array, array, empty_array, array}; - uint32_t num_arrays = sizeof(array_data) / sizeof(array_data[0]); - test_array_parser(column_pb, "[[], [1, null, 3], [], [1, null, 3]]", - {array_data, num_arrays, false, nullptr}); - bool array_null_signs[] = {false, true, true, false}; - test_array_parser(column_pb, "[[], null, null, [1, null, 3]]", - {array_data, num_arrays, true, array_null_signs}); -} - -TEST(ArrayParserTest, TestLargeIntArray) { - auto column_pb = create_column_pb("ARRAY", "LARGEINT"); - test_array_parser(column_pb, "[]", CollectionValue(0)); - - __int128_t data[] = {(1L << 31) - 1, (1LU << 63) - 1, (1LU << 63) | ((1LU << 63) - 1)}; - int num_items = sizeof(data) / sizeof(data[0]); - CollectionValue value(data, num_items, false, nullptr); - test_array_parser(column_pb, "[2147483647, 9223372036854775807, 18446744073709551615]", value); - - bool null_signs[] = {false, true, false}; - value.set_has_null(true); - value.set_null_signs(null_signs); - test_array_parser(column_pb, "[2147483647, null, 18446744073709551615]", value); - - data[1] = static_cast<__int128_t>(1) << 66; - null_signs[1] = false; - test_array_parser(column_pb, - "[\"2147483647\", \"73786976294838206464\", \"18446744073709551615\"]", - value); -} - -TEST(ArrayParserTest, TestDecimalArray) { - auto column_pb = create_column_pb("ARRAY", "DECIMAL"); - test_array_parser(column_pb, "[]", CollectionValue(0)); - - std::string literals[] = {"2147483647", "9223372036854775807"}; - uint32_t num_items = sizeof(literals) / sizeof(literals[0]); - decimal12_t data[num_items]; - for (int i = 0; i < num_items; ++i) { - auto decimal_value = DecimalV2Value(literals[i]); - data[i].integer = decimal_value.int_value(); - data[i].fraction = decimal_value.frac_value(); - } - CollectionValue value(data, num_items, false, nullptr); - test_array_parser(column_pb, "[2147483647, 9223372036854775807]", value); - - bool null_signs[] = {false, true}; - value.set_has_null(true); - value.set_null_signs(null_signs); - test_array_parser(column_pb, "[2147483647, null]", value); - - null_signs[1] = false; - test_array_parser(column_pb, "[\"2147483647\", \"9223372036854775807\"]", value); - - literals[0] = "2147483647.5"; - literals[1] = "34359738368.5"; - for (int i = 0; i < num_items; ++i) { - auto decimal_value = DecimalV2Value(literals[i]); - data[i].integer = decimal_value.int_value(); - data[i].fraction = decimal_value.frac_value(); - } - value = {data, num_items, false, nullptr}; - test_array_parser(column_pb, "[2147483647.5, \"34359738368.5\"]", value); -} - -TEST(ArrayParserTest, TestFreePool) { - auto column_pb = create_column_pb("ARRAY", "DECIMAL"); - MemPool mem_pool; - FunctionContext context; - ArrayUtils::prepare_context(context, mem_pool, column_pb); - int alignment = 1; - for (int i = 1; i <= 4; ++i) { - alignment <<= 1; - auto* p = context.aligned_allocate(alignment, alignment); - EXPECT_TRUE(reinterpret_cast(p) % alignment == 0); - p = context.aligned_allocate(alignment, alignment); - EXPECT_TRUE(reinterpret_cast(p) % alignment == 0); - } -} - -} // namespace doris