From 199d7d3be8dba15d607d7dbdb0d9defd1774ba0c Mon Sep 17 00:00:00 2001 From: ZhaoChangle Date: Sun, 22 Jan 2023 16:39:23 +0800 Subject: [PATCH] [Refactor]Merged string_value into string_ref (#15925) --- be/src/exec/arrow/parquet_reader.cpp | 15 +- be/src/exec/base_scanner.cpp | 8 +- be/src/exec/es/es_predicate.cpp | 4 +- be/src/exec/es/es_scroll_parser.cpp | 22 +- be/src/exec/es/es_scroll_parser.h | 2 +- be/src/exec/hash_table.cpp | 6 +- be/src/exec/olap_utils.h | 2 +- be/src/exec/schema_scanner.cpp | 2 +- .../schema_backends_scanner.cpp | 12 +- .../schema_charsets_scanner.cpp | 38 +-- .../schema_collations_scanner.cpp | 50 ++-- .../schema_scanner/schema_columns_scanner.cpp | 154 ++++++----- .../schema_scanner/schema_files_scanner.cpp | 58 ++--- .../schema_partitions_scanner.cpp | 28 +- .../schema_scanner/schema_rowsets_scanner.cpp | 12 +- .../schema_schema_privileges_scanner.cpp | 24 +- .../schema_schemata_scanner.cpp | 50 ++-- .../schema_statistics_scanner.cpp | 24 +- .../schema_table_privileges_scanner.cpp | 26 +- .../schema_scanner/schema_tables_scanner.cpp | 100 +++---- .../schema_user_privileges_scanner.cpp | 22 +- .../schema_variables_scanner.cpp | 26 +- .../schema_scanner/schema_views_scanner.cpp | 104 ++++---- be/src/exec/text_converter.cpp | 10 +- be/src/exec/text_converter.h | 4 +- be/src/exec/text_converter.hpp | 17 +- be/src/exprs/agg_fn_evaluator.cpp | 16 +- be/src/exprs/aggregate_functions.cpp | 24 +- be/src/exprs/anyval_util.h | 16 +- be/src/exprs/binary_predicate.cpp | 6 +- be/src/exprs/bitmap_function.cpp | 34 +-- be/src/exprs/bloomfilter_predicate.h | 16 +- be/src/exprs/create_predicate_function.h | 4 +- be/src/exprs/expr.h | 5 +- be/src/exprs/expr_context.cpp | 4 +- be/src/exprs/expr_value.h | 16 +- be/src/exprs/hybrid_set.h | 48 ++-- be/src/exprs/info_func.cpp | 6 +- be/src/exprs/like_predicate.cpp | 22 +- be/src/exprs/like_predicate.h | 6 +- be/src/exprs/literal.cpp | 6 +- be/src/exprs/match_predicate.cpp | 6 +- be/src/exprs/math_functions.cpp | 4 +- be/src/exprs/minmax_predicate.h | 14 +- be/src/exprs/new_agg_fn_evaluator.cc | 10 +- be/src/exprs/new_in_predicate.cpp | 6 +- be/src/exprs/operators.cpp | 4 +- be/src/exprs/rpc_fn.cpp | 12 +- be/src/exprs/runtime_filter.cpp | 34 +-- be/src/exprs/slot_ref.cpp | 2 +- be/src/exprs/string_functions.cpp | 31 ++- be/src/exprs/string_functions.h | 2 +- be/src/olap/aggregate_func.h | 2 +- be/src/olap/comparison_predicate.h | 20 +- be/src/olap/hll.cpp | 2 +- be/src/olap/in_list_predicate.h | 31 +-- be/src/olap/like_column_predicate.cpp | 18 +- be/src/olap/like_column_predicate.h | 25 +- be/src/olap/null_predicate.cpp | 2 +- be/src/olap/predicate_creator.h | 6 +- be/src/olap/rowset/segment_v2/bloom_filter.h | 2 +- .../olap/rowset/segment_v2/column_reader.cpp | 8 +- .../segment_v2/inverted_index_reader.cpp | 10 +- be/src/runtime/CMakeLists.txt | 1 - be/src/runtime/collection_value.cpp | 34 +-- be/src/runtime/fold_constant_executor.cpp | 2 +- be/src/runtime/primitive_type.cpp | 4 +- be/src/runtime/primitive_type.h | 10 +- be/src/runtime/raw_value.cpp | 54 ++-- be/src/runtime/raw_value.h | 38 +-- be/src/runtime/string_buffer.hpp | 40 +-- be/src/runtime/string_search.hpp | 31 +-- be/src/runtime/string_value.cpp | 57 ---- be/src/runtime/string_value.h | 211 --------------- be/src/runtime/string_value.hpp | 59 ----- be/src/runtime/tuple.cpp | 66 ++--- be/src/runtime/tuple.h | 12 +- be/src/runtime/type_limit.h | 8 +- be/src/udf/udf.h | 6 +- be/src/util/bitmap_intersect.h | 22 +- be/src/util/simd/vstring_function.h | 2 +- be/src/util/static_asserts.cpp | 8 +- be/src/util/url_parser.cpp | 102 ++++---- be/src/util/url_parser.h | 42 +-- be/src/vec/CMakeLists.txt | 1 + .../aggregate_function_orthogonal_bitmap.cpp | 2 +- be/src/vec/columns/column_dictionary.h | 56 ++-- be/src/vec/columns/predicate_column.h | 34 +-- be/src/vec/common/hash_table/hash.h | 3 +- be/src/vec/common/hash_table/hash_table.h | 2 +- .../common/hash_table/hash_table_key_holder.h | 6 +- .../vec/common/hash_table/string_hash_map.h | 26 +- .../vec/common/hash_table/string_hash_table.h | 27 +- be/src/vec/common/string_ref.cpp | 81 ++++++ be/src/vec/common/string_ref.h | 245 +++++++++++++----- be/src/vec/core/block.cpp | 38 +-- be/src/vec/core/types.h | 4 +- .../exec/data_gen_functions/vnumbers_tvf.cpp | 2 +- be/src/vec/exec/format/orc/vorc_reader.cpp | 4 +- .../exec/format/parquet/parquet_pred_cmp.h | 8 +- .../vec/exec/format/table/iceberg_reader.cpp | 2 +- be/src/vec/exec/scan/vscan_node.cpp | 10 +- be/src/vec/exec/vbroker_scan_node.cpp | 2 +- be/src/vec/exec/vbroker_scanner.cpp | 6 +- be/src/vec/exec/vdata_gen_scan_node.cpp | 1 - be/src/vec/exec/vjson_scanner.cpp | 12 +- be/src/vec/exec/vmysql_scan_node.cpp | 2 +- be/src/vec/exec/vschema_scan_node.cpp | 8 +- .../table_function/vexplode_json_array.h | 4 +- .../vec/exprs/table_function/vexplode_split.h | 2 +- be/src/vec/exprs/vbloom_predicate.cpp | 2 +- be/src/vec/exprs/vdirect_in_predicate.h | 2 +- be/src/vec/functions/function_regexp.cpp | 2 +- be/src/vec/functions/function_string.cpp | 6 +- be/src/vec/functions/function_string.h | 33 +-- be/src/vec/functions/like.cpp | 126 ++++----- be/src/vec/functions/like.h | 77 +++--- be/src/vec/io/io_helper.h | 4 +- be/src/vec/olap/vertical_merge_iterator.cpp | 7 +- be/src/vec/olap/vertical_merge_iterator.h | 4 +- be/src/vec/runtime/vfile_result_writer.cpp | 2 +- be/src/vec/runtime/vorc_writer.cpp | 26 +- be/test/exprs/bitmap_function_test.cpp | 3 +- be/test/exprs/bloom_filter_predicate_test.cpp | 24 +- be/test/exprs/hybrid_set_test.cpp | 34 +-- be/test/olap/block_column_predicate_test.cpp | 2 +- be/test/runtime/string_buffer_test.cpp | 2 +- be/test/runtime/string_value_test.cpp | 11 +- be/test/util/array_parser_test.cpp | 4 +- be/test/vec/core/block_test.cpp | 2 +- .../vec/exec/parquet/parquet_thrift_test.cpp | 18 +- gensrc/script/gen_functions.py | 2 +- gensrc/script/gen_vector_functions.py | 2 +- 133 files changed, 1485 insertions(+), 1607 deletions(-) delete mode 100644 be/src/runtime/string_value.cpp delete mode 100644 be/src/runtime/string_value.h delete mode 100644 be/src/runtime/string_value.hpp create mode 100644 be/src/vec/common/string_ref.cpp diff --git a/be/src/exec/arrow/parquet_reader.cpp b/be/src/exec/arrow/parquet_reader.cpp index c19e8e4269..ba2fe9098f 100644 --- a/be/src/exec/arrow/parquet_reader.cpp +++ b/be/src/exec/arrow/parquet_reader.cpp @@ -31,9 +31,9 @@ #include "io/file_reader.h" #include "runtime/descriptors.h" #include "runtime/mem_pool.h" -#include "runtime/string_value.h" #include "runtime/tuple.h" #include "util/string_util.h" +#include "vec/common/string_ref.h" namespace doris { @@ -118,15 +118,18 @@ Status ParquetReaderWrap::size(int64_t* size) { } } +// TODO: NEED TO REWRITE COMPLETELY. the way writing now is WRONG. +// StringRef shouldn't managing exclusive memory cause it will break RAII. +// besides, accessing object which is essentially const by non-const object +// is UB! inline void ParquetReaderWrap::fill_slot(Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value, int32_t len) { tuple->set_not_null(slot_desc->null_indicator_offset()); void* slot = tuple->get_slot(slot_desc->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = reinterpret_cast(mem_pool->allocate(len)); - memcpy(str_slot->ptr, value, len); - str_slot->len = len; - return; + StringRef* str_slot = reinterpret_cast(slot); + str_slot->data = reinterpret_cast(mem_pool->allocate(len)); + memcpy(const_cast(str_slot->data), value, len); // ! + str_slot->size = len; } inline Status ParquetReaderWrap::set_field_null(Tuple* tuple, const SlotDescriptor* slot_desc) { diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp index 7e6887c9b5..193860f286 100644 --- a/be/src/exec/base_scanner.cpp +++ b/be/src/exec/base_scanner.cpp @@ -222,7 +222,7 @@ Status BaseScanner::_fill_dest_tuple(Tuple* dest_tuple, MemPool* mem_pool) { }, [&]() -> std::string { // Type of the slot is must be Varchar in _src_tuple. - StringValue* raw_value = _src_tuple->get_string_slot( + StringRef* raw_value = _src_tuple->get_string_slot( _src_slot_descs_order_by_dest[dest_index]->tuple_offset()); std::string raw_string; if (raw_value != nullptr) { //is not null then get raw value @@ -418,10 +418,10 @@ void BaseScanner::fill_slots_of_columns_from_path( auto slot_desc = _src_slot_descs.at(i + start); _src_tuple->set_not_null(slot_desc->null_indicator_offset()); void* slot = _src_tuple->get_slot(slot_desc->tuple_offset()); - auto* str_slot = reinterpret_cast(slot); + auto* str_slot = reinterpret_cast(slot); const std::string& column_from_path = columns_from_path[i]; - str_slot->ptr = const_cast(column_from_path.c_str()); - str_slot->len = column_from_path.size(); + str_slot->data = column_from_path.c_str(); + str_slot->size = column_from_path.size(); } } diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp index 9773cc32c6..f3080ea86a 100644 --- a/be/src/exec/es/es_predicate.cpp +++ b/be/src/exec/es/es_predicate.cpp @@ -31,8 +31,8 @@ #include "exprs/in_predicate.h" #include "runtime/datetime_value.h" #include "runtime/large_int_value.h" -#include "runtime/string_value.h" #include "runtime/tuple_row.h" +#include "vec/common/string_ref.h" namespace doris { @@ -124,7 +124,7 @@ double ExtLiteral::get_double() { std::string ExtLiteral::get_string() { DCHECK(_type == TYPE_VARCHAR || _type == TYPE_CHAR || _type == TYPE_STRING); - return (reinterpret_cast(_value))->to_string(); + return (reinterpret_cast(_value))->to_string(); } std::string ExtLiteral::get_date_string() { diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 7170057ac2..46eced5d08 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -29,8 +29,8 @@ #include "rapidjson/writer.h" #include "runtime/mem_pool.h" #include "runtime/memory/mem_tracker.h" -#include "runtime/string_value.h" #include "util/string_parser.hpp" +#include "vec/common/string_ref.h" #include "vec/runtime/vdatetime_value.h" namespace doris { @@ -256,9 +256,9 @@ static Status insert_int_value(const rapidjson::Value& col, PrimitiveType type, return Status::OK(); } -ScrollParser::ScrollParser(bool doc_value_mode) : _scroll_id(""), _size(0), _line_index(0) {} +ScrollParser::ScrollParser(bool doc_value_mode) : _size(0), _line_index(0) {} -ScrollParser::~ScrollParser() {} +ScrollParser::~ScrollParser() = default; Status ScrollParser::parse(const std::string& scroll_result, bool exactly_once) { // rely on `_size !=0 ` to determine whether scroll ends @@ -294,7 +294,7 @@ Status ScrollParser::parse(const std::string& scroll_result, bool exactly_once) return Status::OK(); } -int ScrollParser::get_size() { +int ScrollParser::get_size() const { return _size; } @@ -325,7 +325,7 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, if (!slot_desc->is_materialized()) { continue; } - // _id field must exists in every document, this is guaranteed by ES + // _id field must exist in every document, this is guaranteed by ES // if _id was found in tuple, we would get `_id` value from inner-hit node // json-format response would like below: // "hits": { @@ -355,8 +355,8 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, RETURN_LIMIT_EXCEEDED(nullptr, details, len); } memcpy(buffer, _id.data(), len); - reinterpret_cast(slot)->ptr = buffer; - reinterpret_cast(slot)->len = len; + reinterpret_cast(slot)->data = buffer; + reinterpret_cast(slot)->size = len; continue; } @@ -389,7 +389,7 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, case TYPE_STRING: { // sometimes elasticsearch user post some not-string value to Elasticsearch Index. // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation - // this may be a tricky, but we can workaround this issue + // this may be a tricky, but we can work around this issue std::string val; if (pure_doc_value) { if (!col[0].IsString()) { @@ -413,8 +413,8 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, RETURN_LIMIT_EXCEEDED(nullptr, details, val_size); } memcpy(buffer, val.data(), val_size); - reinterpret_cast(slot)->ptr = buffer; - reinterpret_cast(slot)->len = val_size; + reinterpret_cast(slot)->data = buffer; + reinterpret_cast(slot)->size = val_size; break; } @@ -638,7 +638,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, case TYPE_STRING: { // sometimes elasticsearch user post some not-string value to Elasticsearch Index. // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation - // this may be a tricky, but we can workaround this issue + // this may be a tricky, but we can work around this issue std::string val; if (pure_doc_value) { if (!col[0].IsString()) { diff --git a/be/src/exec/es/es_scroll_parser.h b/be/src/exec/es/es_scroll_parser.h index 561150ac04..f3c7af70fc 100644 --- a/be/src/exec/es/es_scroll_parser.h +++ b/be/src/exec/es/es_scroll_parser.h @@ -41,7 +41,7 @@ public: bool* line_eof, const std::map& docvalue_context); const std::string& get_scroll_id(); - int get_size(); + int get_size() const; private: // helper method for processing date/datetime cols with rapidjson::Value diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp index 4bb795954a..6db56ba2ff 100644 --- a/be/src/exec/hash_table.cpp +++ b/be/src/exec/hash_table.cpp @@ -132,11 +132,11 @@ uint32_t HashTable::hash_variable_len_row() { if (_expr_value_null_bits[i]) { // Hash the null random seed values at 'loc' - hash = HashUtil::hash(loc, sizeof(StringValue), hash); + hash = HashUtil::hash(loc, sizeof(StringRef), hash); } else { // Hash the string - StringValue* str = reinterpret_cast(loc); - hash = HashUtil::hash(str->ptr, str->len, hash); + StringRef* str = reinterpret_cast(loc); + hash = HashUtil::hash(str->data, str->size, hash); } } } diff --git a/be/src/exec/olap_utils.h b/be/src/exec/olap_utils.h index 6f1dca46e2..e28dcf0a2b 100644 --- a/be/src/exec/olap_utils.h +++ b/be/src/exec/olap_utils.h @@ -70,7 +70,7 @@ inline CompareLargeFunc get_compare_func(PrimitiveType type) { case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_STRING: - return compare_large; + return compare_large; default: DCHECK(false) << "Unsupported Compare type"; diff --git a/be/src/exec/schema_scanner.cpp b/be/src/exec/schema_scanner.cpp index a452077f6c..4f2d029df7 100644 --- a/be/src/exec/schema_scanner.cpp +++ b/be/src/exec/schema_scanner.cpp @@ -33,7 +33,7 @@ #include "exec/schema_scanner/schema_variables_scanner.h" #include "exec/schema_scanner/schema_views_scanner.h" #include "runtime/define_primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { diff --git a/be/src/exec/schema_scanner/schema_backends_scanner.cpp b/be/src/exec/schema_scanner/schema_backends_scanner.cpp index 50684ebbcb..9c1b0c015a 100644 --- a/be/src/exec/schema_scanner/schema_backends_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_backends_scanner.cpp @@ -27,8 +27,8 @@ #include "runtime/define_primitive_type.h" #include "runtime/exec_env.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" #include "util/thrift_rpc_helper.h" +#include "vec/common/string_ref.h" namespace doris { @@ -87,12 +87,12 @@ Status SchemaBackendsScanner::_fill_one_col(Tuple* tuple, MemPool* pool, size_t *(reinterpret_cast(slot)) = _batch_data[_row_idx].column_value[col_idx].intVal; } else if (it->second == TYPE_VARCHAR) { void* slot = tuple->get_slot(_tuple_desc->slots()[col_idx]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = + StringRef* str_slot = reinterpret_cast(slot); + str_slot->data = (char*)pool->allocate(_batch_data[_row_idx].column_value[col_idx].stringVal.size()); - str_slot->len = _batch_data[_row_idx].column_value[col_idx].stringVal.size(); - memcpy(str_slot->ptr, _batch_data[_row_idx].column_value[col_idx].stringVal.c_str(), - str_slot->len); + str_slot->size = _batch_data[_row_idx].column_value[col_idx].stringVal.size(); + memcpy(const_cast(str_slot->data), + _batch_data[_row_idx].column_value[col_idx].stringVal.c_str(), str_slot->size); } else if (it->second == TYPE_DOUBLE) { void* slot = tuple->get_slot(_tuple_desc->slots()[col_idx]->tuple_offset()); *(reinterpret_cast(slot)) = diff --git a/be/src/exec/schema_scanner/schema_charsets_scanner.cpp b/be/src/exec/schema_scanner/schema_charsets_scanner.cpp index 27772c85bf..0a96db94c4 100644 --- a/be/src/exec/schema_scanner/schema_charsets_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_charsets_scanner.cpp @@ -17,15 +17,15 @@ #include "exec/schema_scanner/schema_charsets_scanner.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaCharsetsScanner::_s_css_columns[] = { // name, type, size - {"CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"DEFAULT_COLLATE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"DESCRIPTION", TYPE_VARCHAR, sizeof(StringValue), false}, + {"CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"DEFAULT_COLLATE_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"DESCRIPTION", TYPE_VARCHAR, sizeof(StringRef), false}, {"MAXLEN", TYPE_BIGINT, sizeof(int64_t), false}, }; @@ -44,38 +44,38 @@ Status SchemaCharsetsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // variables names { void* slot = tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); int len = strlen(_s_charsets[_index].charset); - str_slot->ptr = (char*)pool->allocate(len + 1); - if (nullptr == str_slot->ptr) { + str_slot->data = (char*)pool->allocate(len + 1); + if (nullptr == str_slot->data) { return Status::InternalError("No Memory."); } - memcpy(str_slot->ptr, _s_charsets[_index].charset, len + 1); - str_slot->len = len; + memcpy(const_cast(str_slot->data), _s_charsets[_index].charset, len + 1); + str_slot->size = len; } // DEFAULT_COLLATE_NAME { void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); int len = strlen(_s_charsets[_index].default_collation); - str_slot->ptr = (char*)pool->allocate(len + 1); - if (nullptr == str_slot->ptr) { + str_slot->data = (char*)pool->allocate(len + 1); + if (nullptr == str_slot->data) { return Status::InternalError("No Memory."); } - memcpy(str_slot->ptr, _s_charsets[_index].default_collation, len + 1); - str_slot->len = len; + memcpy(const_cast(str_slot->data), _s_charsets[_index].default_collation, len + 1); + str_slot->size = len; } // DESCRIPTION { void* slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); int len = strlen(_s_charsets[_index].description); - str_slot->ptr = (char*)pool->allocate(len + 1); - if (nullptr == str_slot->ptr) { + str_slot->data = (char*)pool->allocate(len + 1); + if (nullptr == str_slot->data) { return Status::InternalError("No Memory."); } - memcpy(str_slot->ptr, _s_charsets[_index].description, len + 1); - str_slot->len = len; + memcpy(const_cast(str_slot->data), _s_charsets[_index].description, len + 1); + str_slot->size = len; } // maxlen { diff --git a/be/src/exec/schema_scanner/schema_collations_scanner.cpp b/be/src/exec/schema_scanner/schema_collations_scanner.cpp index 304eec82b1..3201deb68b 100644 --- a/be/src/exec/schema_scanner/schema_collations_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_collations_scanner.cpp @@ -18,17 +18,17 @@ #include "exec/schema_scanner/schema_collations_scanner.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaCollationsScanner::_s_cols_columns[] = { // name, type, size - {"COLLATION_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"COLLATION_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, {"ID", TYPE_BIGINT, sizeof(int64_t), false}, - {"IS_DEFAULT", TYPE_VARCHAR, sizeof(StringValue), false}, - {"IS_COMPILED", TYPE_VARCHAR, sizeof(StringValue), false}, + {"IS_DEFAULT", TYPE_VARCHAR, sizeof(StringRef), false}, + {"IS_COMPILED", TYPE_VARCHAR, sizeof(StringRef), false}, {"SORTLEN", TYPE_BIGINT, sizeof(int64_t), false}, }; @@ -48,26 +48,26 @@ Status SchemaCollationsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // COLLATION_NAME { void* slot = tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); int len = strlen(_s_collations[_index].name); - str_slot->ptr = (char*)pool->allocate(len + 1); - if (nullptr == str_slot->ptr) { + str_slot->data = (char*)pool->allocate(len + 1); + if (nullptr == str_slot->data) { return Status::InternalError("No Memory."); } - memcpy(str_slot->ptr, _s_collations[_index].name, len + 1); - str_slot->len = len; + memcpy(const_cast(str_slot->data), _s_collations[_index].name, len + 1); + str_slot->size = len; } // charset { void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); int len = strlen(_s_collations[_index].charset); - str_slot->ptr = (char*)pool->allocate(len + 1); - if (nullptr == str_slot->ptr) { + str_slot->data = (char*)pool->allocate(len + 1); + if (nullptr == str_slot->data) { return Status::InternalError("No Memory."); } - memcpy(str_slot->ptr, _s_collations[_index].charset, len + 1); - str_slot->len = len; + memcpy(const_cast(str_slot->data), _s_collations[_index].charset, len + 1); + str_slot->size = len; } // id { @@ -77,26 +77,26 @@ Status SchemaCollationsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // is_default { void* slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); int len = strlen(_s_collations[_index].is_default); - str_slot->ptr = (char*)pool->allocate(len + 1); - if (nullptr == str_slot->ptr) { + str_slot->data = (char*)pool->allocate(len + 1); + if (nullptr == str_slot->data) { return Status::InternalError("No Memory."); } - memcpy(str_slot->ptr, _s_collations[_index].is_default, len + 1); - str_slot->len = len; + memcpy(const_cast(str_slot->data), _s_collations[_index].is_default, len + 1); + str_slot->size = len; } // IS_COMPILED { void* slot = tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); int len = strlen(_s_collations[_index].is_compile); - str_slot->ptr = (char*)pool->allocate(len + 1); - if (nullptr == str_slot->ptr) { + str_slot->data = (char*)pool->allocate(len + 1); + if (nullptr == str_slot->data) { return Status::InternalError("No Memory."); } - memcpy(str_slot->ptr, _s_collations[_index].is_compile, len + 1); - str_slot->len = len; + memcpy(const_cast(str_slot->data), _s_collations[_index].is_compile, len + 1); + str_slot->size = len; } // sortlen { diff --git a/be/src/exec/schema_scanner/schema_columns_scanner.cpp b/be/src/exec/schema_scanner/schema_columns_scanner.cpp index fcf78542bf..60a133b6d0 100644 --- a/be/src/exec/schema_scanner/schema_columns_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_columns_scanner.cpp @@ -21,35 +21,35 @@ #include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaColumnsScanner::_s_col_columns[] = { // name, type, size, is_null - {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, - {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"COLUMN_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringRef), false}, + {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"COLUMN_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, {"ORDINAL_POSITION", TYPE_BIGINT, sizeof(int64_t), false}, - {"COLUMN_DEFAULT", TYPE_VARCHAR, sizeof(StringValue), true}, - {"IS_NULLABLE", TYPE_VARCHAR, sizeof(StringValue), false}, - {"DATA_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, + {"COLUMN_DEFAULT", TYPE_VARCHAR, sizeof(StringRef), true}, + {"IS_NULLABLE", TYPE_VARCHAR, sizeof(StringRef), false}, + {"DATA_TYPE", TYPE_VARCHAR, sizeof(StringRef), false}, {"CHARACTER_MAXIMUM_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, {"CHARACTER_OCTET_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, {"NUMERIC_PRECISION", TYPE_BIGINT, sizeof(int64_t), true}, {"NUMERIC_SCALE", TYPE_BIGINT, sizeof(int64_t), true}, {"DATETIME_PRECISION", TYPE_BIGINT, sizeof(int64_t), true}, - {"CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringValue), true}, - {"COLLATION_NAME", TYPE_VARCHAR, sizeof(StringValue), true}, - {"COLUMN_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, - {"COLUMN_KEY", TYPE_VARCHAR, sizeof(StringValue), false}, - {"EXTRA", TYPE_VARCHAR, sizeof(StringValue), false}, - {"PRIVILEGES", TYPE_VARCHAR, sizeof(StringValue), false}, - {"COLUMN_COMMENT", TYPE_VARCHAR, sizeof(StringValue), false}, + {"CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringRef), true}, + {"COLLATION_NAME", TYPE_VARCHAR, sizeof(StringRef), true}, + {"COLUMN_TYPE", TYPE_VARCHAR, sizeof(StringRef), false}, + {"COLUMN_KEY", TYPE_VARCHAR, sizeof(StringRef), false}, + {"EXTRA", TYPE_VARCHAR, sizeof(StringRef), false}, + {"PRIVILEGES", TYPE_VARCHAR, sizeof(StringRef), false}, + {"COLUMN_COMMENT", TYPE_VARCHAR, sizeof(StringRef), false}, {"COLUMN_SIZE", TYPE_BIGINT, sizeof(int64_t), true}, {"DECIMAL_DIGITS", TYPE_BIGINT, sizeof(int64_t), true}, - {"GENERATION_EXPRESSION", TYPE_VARCHAR, sizeof(StringValue), true}, + {"GENERATION_EXPRESSION", TYPE_VARCHAR, sizeof(StringRef), true}, {"SRS_ID", TYPE_BIGINT, sizeof(int64_t), true}, }; @@ -59,7 +59,7 @@ SchemaColumnsScanner::SchemaColumnsScanner() _table_index(0), _column_index(0) {} -SchemaColumnsScanner::~SchemaColumnsScanner() {} +SchemaColumnsScanner::~SchemaColumnsScanner() = default; Status SchemaColumnsScanner::start(RuntimeState* state) { if (!_is_init) { @@ -218,6 +218,9 @@ std::string SchemaColumnsScanner::type_to_string(TColumnDesc& desc) { } } +// TODO: ALL schema algorithm like these need to be refactor to avoid UB and +// keep StringRef semantic to be kept. + //fill row in the "INFORMATION_SCHEMA COLUMNS" //Reference from https://dev.mysql.com/doc/refman/8.0/en/information-schema-columns-table.html Status SchemaColumnsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { @@ -230,39 +233,43 @@ Status SchemaColumnsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { tuple->set_null(_tuple_desc->slots()[0]->null_indicator_offset()); } else { void* slot = tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); + // todo: we may change all of StringRef in similar usage + // to Slice someday to distinguish different purposes of use. + // or just merge them. std::string catalog_name = _db_result.catalogs[_db_index - 1]; - str_slot->ptr = (char*)pool->allocate(catalog_name.size()); - str_slot->len = catalog_name.size(); - memcpy(str_slot->ptr, catalog_name.c_str(), str_slot->len); + str_slot->data = (char*)pool->allocate(catalog_name.size()); + str_slot->size = catalog_name.size(); + memcpy(const_cast(str_slot->data), catalog_name.c_str(), str_slot->size); } } // TABLE_SCHEMA { void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); std::string db_name = SchemaHelper::extract_db_name(_db_result.dbs[_db_index - 1]); - str_slot->ptr = (char*)pool->allocate(db_name.size()); - str_slot->len = db_name.size(); - memcpy(str_slot->ptr, db_name.c_str(), str_slot->len); + str_slot->data = (char*)pool->allocate(db_name.size()); + str_slot->size = db_name.size(); + memcpy(const_cast(str_slot->data), db_name.c_str(), str_slot->size); } // TABLE_NAME { void* slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = (char*)pool->allocate(_table_result.tables[_table_index - 1].length()); - str_slot->len = _table_result.tables[_table_index - 1].length(); - memcpy(str_slot->ptr, _table_result.tables[_table_index - 1].c_str(), str_slot->len); + StringRef* str_slot = reinterpret_cast(slot); + str_slot->data = (char*)pool->allocate(_table_result.tables[_table_index - 1].length()); + str_slot->size = _table_result.tables[_table_index - 1].length(); + memcpy(const_cast(str_slot->data), _table_result.tables[_table_index - 1].c_str(), + str_slot->size); } // COLUMN_NAME { void* slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = (char*)pool->allocate( + StringRef* str_slot = reinterpret_cast(slot); + str_slot->data = (char*)pool->allocate( _desc_result.columns[_column_index].columnDesc.columnName.length()); - str_slot->len = _desc_result.columns[_column_index].columnDesc.columnName.length(); - memcpy(str_slot->ptr, _desc_result.columns[_column_index].columnDesc.columnName.c_str(), - str_slot->len); + str_slot->size = _desc_result.columns[_column_index].columnDesc.columnName.length(); + memcpy(const_cast(str_slot->data), + _desc_result.columns[_column_index].columnDesc.columnName.c_str(), str_slot->size); } // ORDINAL_POSITION { @@ -275,33 +282,33 @@ Status SchemaColumnsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // IS_NULLABLE { void* slot = tuple->get_slot(_tuple_desc->slots()[6]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); if (_desc_result.columns[_column_index].columnDesc.__isset.isAllowNull) { if (_desc_result.columns[_column_index].columnDesc.isAllowNull) { - str_slot->len = strlen("YES"); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - memcpy(str_slot->ptr, "YES", str_slot->len); + str_slot->size = strlen("YES"); + str_slot->data = (char*)pool->allocate(str_slot->size); + memcpy(const_cast(str_slot->data), "YES", str_slot->size); } else { - str_slot->len = strlen("NO"); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - memcpy(str_slot->ptr, "NO", str_slot->len); + str_slot->size = strlen("NO"); + str_slot->data = (char*)pool->allocate(str_slot->size); + memcpy(const_cast(str_slot->data), "NO", str_slot->size); } } else { - str_slot->len = strlen("NO"); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - memcpy(str_slot->ptr, "NO", str_slot->len); + str_slot->size = strlen("NO"); + str_slot->data = (char*)pool->allocate(str_slot->size); + memcpy(const_cast(str_slot->data), "NO", str_slot->size); } } // DATA_TYPE { void* slot = tuple->get_slot(_tuple_desc->slots()[7]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); std::string buffer = to_mysql_data_type_string(_desc_result.columns[_column_index].columnDesc); - str_slot->len = buffer.length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - memcpy(str_slot->ptr, buffer.c_str(), str_slot->len); + str_slot->size = buffer.length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + memcpy(const_cast(str_slot->data), buffer.c_str(), str_slot->size); } // CHARACTER_MAXIMUM_LENGTH // For string columns, the maximum length in characters. @@ -366,51 +373,54 @@ Status SchemaColumnsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // COLUMN_TYPE { void* slot = tuple->get_slot(_tuple_desc->slots()[15]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); std::string buffer = type_to_string(_desc_result.columns[_column_index].columnDesc); - str_slot->len = buffer.length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - memcpy(str_slot->ptr, buffer.c_str(), str_slot->len); + str_slot->size = buffer.length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + memcpy(const_cast(str_slot->data), buffer.c_str(), str_slot->size); } // COLUMN_KEY { void* slot = tuple->get_slot(_tuple_desc->slots()[16]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); if (_desc_result.columns[_column_index].columnDesc.__isset.columnKey) { - str_slot->len = _desc_result.columns[_column_index].columnDesc.columnKey.length(); - str_slot->ptr = (char*)pool->allocate( + str_slot->size = _desc_result.columns[_column_index].columnDesc.columnKey.length(); + str_slot->data = (char*)pool->allocate( _desc_result.columns[_column_index].columnDesc.columnKey.length()); - memcpy(str_slot->ptr, _desc_result.columns[_column_index].columnDesc.columnKey.c_str(), - str_slot->len); + memcpy(const_cast(str_slot->data), + _desc_result.columns[_column_index].columnDesc.columnKey.c_str(), + str_slot->size); } else { - str_slot->len = strlen("") + 1; - str_slot->ptr = (char*)pool->allocate(str_slot->len); - memcpy(str_slot->ptr, "", str_slot->len); + str_slot->size = strlen("") + 1; + str_slot->data = (char*)pool->allocate(str_slot->size); + memcpy(const_cast(str_slot->data), "", str_slot->size); } } // EXTRA { void* slot = tuple->get_slot(_tuple_desc->slots()[17]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->len = strlen("") + 1; - str_slot->ptr = (char*)pool->allocate(str_slot->len); - memcpy(str_slot->ptr, "", str_slot->len); + StringRef* str_slot = reinterpret_cast(slot); + str_slot->size = strlen("") + 1; + str_slot->data = (char*)pool->allocate(str_slot->size); + memcpy(const_cast(str_slot->data), "", str_slot->size); } // PRIVILEGES { void* slot = tuple->get_slot(_tuple_desc->slots()[18]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->len = strlen("") + 1; - str_slot->ptr = (char*)pool->allocate(str_slot->len); - memcpy(str_slot->ptr, "", str_slot->len); + StringRef* str_slot = reinterpret_cast(slot); + str_slot->size = strlen("") + 1; + str_slot->data = (char*)pool->allocate(str_slot->size); + memcpy(const_cast(str_slot->data), "", str_slot->size); } // COLUMN_COMMENT { void* slot = tuple->get_slot(_tuple_desc->slots()[19]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = (char*)pool->allocate(_desc_result.columns[_column_index].comment.length()); - str_slot->len = _desc_result.columns[_column_index].comment.length(); - memcpy(str_slot->ptr, _desc_result.columns[_column_index].comment.c_str(), str_slot->len); + StringRef* str_slot = reinterpret_cast(slot); + str_slot->data = + (char*)pool->allocate(_desc_result.columns[_column_index].comment.length()); + str_slot->size = _desc_result.columns[_column_index].comment.length(); + memcpy(const_cast(str_slot->data), + _desc_result.columns[_column_index].comment.c_str(), str_slot->size); } // COLUMN_SIZE { diff --git a/be/src/exec/schema_scanner/schema_files_scanner.cpp b/be/src/exec/schema_scanner/schema_files_scanner.cpp index d237d3da6e..c9a6b9721d 100644 --- a/be/src/exec/schema_scanner/schema_files_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_files_scanner.cpp @@ -19,50 +19,50 @@ #include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaFilesScanner::_s_tbls_columns[] = { // name, type, size, is_null {"FILE_ID", TYPE_BIGINT, sizeof(int64_t), true}, - {"FILE_NAME", TYPE_STRING, sizeof(StringValue), true}, - {"FILE_TYPE", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLESPACE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"TABLE_CATALOG", TYPE_CHAR, sizeof(StringValue), false}, - {"TABLE_SCHEMA", TYPE_STRING, sizeof(StringValue), true}, - {"TABLE_NAME", TYPE_STRING, sizeof(StringValue), true}, - {"LOGFILE_GROUP_NAME", TYPE_VARCHAR, sizeof(StringValue), true}, + {"FILE_NAME", TYPE_STRING, sizeof(StringRef), true}, + {"FILE_TYPE", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLESPACE_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"TABLE_CATALOG", TYPE_CHAR, sizeof(StringRef), false}, + {"TABLE_SCHEMA", TYPE_STRING, sizeof(StringRef), true}, + {"TABLE_NAME", TYPE_STRING, sizeof(StringRef), true}, + {"LOGFILE_GROUP_NAME", TYPE_VARCHAR, sizeof(StringRef), true}, {"LOGFILE_GROUP_NUMBER", TYPE_BIGINT, sizeof(int64_t), true}, - {"ENGINE", TYPE_VARCHAR, sizeof(StringValue), false}, - {"FULLTEXT_KEYS", TYPE_STRING, sizeof(StringValue), true}, - {"DELETED_ROWS", TYPE_STRING, sizeof(StringValue), true}, - {"UPDATE_COUNT", TYPE_STRING, sizeof(StringValue), true}, + {"ENGINE", TYPE_VARCHAR, sizeof(StringRef), false}, + {"FULLTEXT_KEYS", TYPE_STRING, sizeof(StringRef), true}, + {"DELETED_ROWS", TYPE_STRING, sizeof(StringRef), true}, + {"UPDATE_COUNT", TYPE_STRING, sizeof(StringRef), true}, {"FREE_EXTENTS", TYPE_BIGINT, sizeof(int64_t), true}, {"TOTAL_EXTENTS", TYPE_BIGINT, sizeof(int64_t), true}, {"EXTENT_SIZE", TYPE_BIGINT, sizeof(int64_t), true}, {"INITIAL_SIZE", TYPE_BIGINT, sizeof(int64_t), true}, {"MAXIMUM_SIZE", TYPE_BIGINT, sizeof(int64_t), true}, {"AUTOEXTEND_SIZE", TYPE_BIGINT, sizeof(int64_t), true}, - {"CREATION_TIME", TYPE_STRING, sizeof(StringValue), true}, - {"LAST_UPDATE_TIME", TYPE_STRING, sizeof(StringValue), true}, - {"LAST_ACCESS_TIME", TYPE_STRING, sizeof(StringValue), true}, - {"RECOVER_TIME", TYPE_STRING, sizeof(StringValue), true}, - {"TRANSACTION_COUNTER", TYPE_STRING, sizeof(StringValue), true}, + {"CREATION_TIME", TYPE_STRING, sizeof(StringRef), true}, + {"LAST_UPDATE_TIME", TYPE_STRING, sizeof(StringRef), true}, + {"LAST_ACCESS_TIME", TYPE_STRING, sizeof(StringRef), true}, + {"RECOVER_TIME", TYPE_STRING, sizeof(StringRef), true}, + {"TRANSACTION_COUNTER", TYPE_STRING, sizeof(StringRef), true}, {"VERSION", TYPE_BIGINT, sizeof(int64_t), true}, - {"ROW_FORMAT", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_ROWS", TYPE_STRING, sizeof(StringValue), true}, - {"AVG_ROW_LENGTH", TYPE_STRING, sizeof(StringValue), true}, - {"DATA_LENGTH", TYPE_STRING, sizeof(StringValue), true}, - {"MAX_DATA_LENGTH", TYPE_STRING, sizeof(StringValue), true}, - {"INDEX_LENGTH", TYPE_STRING, sizeof(StringValue), true}, + {"ROW_FORMAT", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_ROWS", TYPE_STRING, sizeof(StringRef), true}, + {"AVG_ROW_LENGTH", TYPE_STRING, sizeof(StringRef), true}, + {"DATA_LENGTH", TYPE_STRING, sizeof(StringRef), true}, + {"MAX_DATA_LENGTH", TYPE_STRING, sizeof(StringRef), true}, + {"INDEX_LENGTH", TYPE_STRING, sizeof(StringRef), true}, {"DATA_FREE", TYPE_BIGINT, sizeof(int64_t), true}, - {"CREATE_TIME", TYPE_STRING, sizeof(StringValue), true}, - {"UPDATE_TIME", TYPE_STRING, sizeof(StringValue), true}, - {"CHECK_TIME", TYPE_STRING, sizeof(StringValue), true}, - {"CHECKSUM", TYPE_STRING, sizeof(StringValue), true}, - {"STATUS", TYPE_VARCHAR, sizeof(StringValue), true}, - {"EXTRA", TYPE_VARCHAR, sizeof(StringValue), true}, + {"CREATE_TIME", TYPE_STRING, sizeof(StringRef), true}, + {"UPDATE_TIME", TYPE_STRING, sizeof(StringRef), true}, + {"CHECK_TIME", TYPE_STRING, sizeof(StringRef), true}, + {"CHECKSUM", TYPE_STRING, sizeof(StringRef), true}, + {"STATUS", TYPE_VARCHAR, sizeof(StringRef), true}, + {"EXTRA", TYPE_VARCHAR, sizeof(StringRef), true}, }; SchemaFilesScanner::SchemaFilesScanner() diff --git a/be/src/exec/schema_scanner/schema_partitions_scanner.cpp b/be/src/exec/schema_scanner/schema_partitions_scanner.cpp index 018e86ede5..71c10ce193 100644 --- a/be/src/exec/schema_scanner/schema_partitions_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_partitions_scanner.cpp @@ -20,24 +20,24 @@ #include "exec/schema_scanner/schema_helper.h" #include "runtime/datetime_value.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaPartitionsScanner::_s_tbls_columns[] = { // name, type, size, is_null - {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"PARTITION_NAME", TYPE_VARCHAR, sizeof(StringValue), true}, - {"SUBPARTITION_NAME", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"PARTITION_NAME", TYPE_VARCHAR, sizeof(StringRef), true}, + {"SUBPARTITION_NAME", TYPE_VARCHAR, sizeof(StringRef), true}, {"PARTITION_ORDINAL_POSITION", TYPE_INT, sizeof(int32_t), true}, {"SUBPARTITION_ORDINAL_POSITION", TYPE_INT, sizeof(int32_t), true}, - {"PARTITION_METHOD", TYPE_VARCHAR, sizeof(StringValue), true}, - {"SUBPARTITION_METHOD", TYPE_VARCHAR, sizeof(StringValue), true}, - {"PARTITION_EXPRESSION", TYPE_VARCHAR, sizeof(StringValue), true}, - {"SUBPARTITION_EXPRESSION", TYPE_VARCHAR, sizeof(StringValue), true}, - {"PARTITION_DESCRIPTION", TYPE_STRING, sizeof(StringValue), true}, + {"PARTITION_METHOD", TYPE_VARCHAR, sizeof(StringRef), true}, + {"SUBPARTITION_METHOD", TYPE_VARCHAR, sizeof(StringRef), true}, + {"PARTITION_EXPRESSION", TYPE_VARCHAR, sizeof(StringRef), true}, + {"SUBPARTITION_EXPRESSION", TYPE_VARCHAR, sizeof(StringRef), true}, + {"PARTITION_DESCRIPTION", TYPE_STRING, sizeof(StringRef), true}, {"TABLE_ROWS", TYPE_BIGINT, sizeof(int64_t), true}, {"AVG_ROW_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, {"DATA_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, @@ -48,9 +48,9 @@ SchemaScanner::ColumnDesc SchemaPartitionsScanner::_s_tbls_columns[] = { {"UPDATE_TIME", TYPE_DATETIME, sizeof(DateTimeValue), true}, {"CHECK_TIME", TYPE_DATETIME, sizeof(DateTimeValue), true}, {"CHECKSUM", TYPE_BIGINT, sizeof(int64_t), true}, - {"PARTITION_COMMENT", TYPE_STRING, sizeof(StringValue), false}, - {"NODEGROUP", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLESPACE_NAME", TYPE_VARCHAR, sizeof(StringValue), true}, + {"PARTITION_COMMENT", TYPE_STRING, sizeof(StringRef), false}, + {"NODEGROUP", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLESPACE_NAME", TYPE_VARCHAR, sizeof(StringRef), true}, }; SchemaPartitionsScanner::SchemaPartitionsScanner() diff --git a/be/src/exec/schema_scanner/schema_rowsets_scanner.cpp b/be/src/exec/schema_scanner/schema_rowsets_scanner.cpp index 00314b8306..800be0c1aa 100644 --- a/be/src/exec/schema_scanner/schema_rowsets_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_rowsets_scanner.cpp @@ -29,12 +29,12 @@ #include "olap/tablet.h" #include "runtime/descriptors.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaRowsetsScanner::_s_tbls_columns[] = { // name, type, size, is_null {"BACKEND_ID", TYPE_BIGINT, sizeof(int64_t), true}, - {"ROWSET_ID", TYPE_VARCHAR, sizeof(StringValue), true}, + {"ROWSET_ID", TYPE_VARCHAR, sizeof(StringRef), true}, {"TABLET_ID", TYPE_BIGINT, sizeof(int64_t), true}, {"ROWSET_NUM_ROWS", TYPE_BIGINT, sizeof(int64_t), true}, {"TXN_ID", TYPE_BIGINT, sizeof(int64_t), true}, @@ -109,11 +109,11 @@ Status SchemaRowsetsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // ROWSET_ID { void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); std::string rowset_id = rowset->rowset_id().to_string(); - str_slot->ptr = (char*)pool->allocate(rowset_id.size()); - str_slot->len = rowset_id.size(); - memcpy(str_slot->ptr, rowset_id.c_str(), str_slot->len); + str_slot->data = (char*)pool->allocate(rowset_id.size()); + str_slot->size = rowset_id.size(); + memcpy(const_cast(str_slot->data), rowset_id.c_str(), str_slot->size); } // TABLET_ID { diff --git a/be/src/exec/schema_scanner/schema_schema_privileges_scanner.cpp b/be/src/exec/schema_scanner/schema_schema_privileges_scanner.cpp index 196f796d36..6b082ba82a 100644 --- a/be/src/exec/schema_scanner/schema_schema_privileges_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_schema_privileges_scanner.cpp @@ -19,17 +19,17 @@ #include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaSchemaPrivilegesScanner::_s_tbls_columns[] = { // name, type, size, is_null - {"GRANTEE", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, - {"PRIVILEGE_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, - {"IS_GRANTABLE", TYPE_VARCHAR, sizeof(StringValue), true}, + {"GRANTEE", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringRef), false}, + {"PRIVILEGE_TYPE", TYPE_VARCHAR, sizeof(StringRef), false}, + {"IS_GRANTABLE", TYPE_VARCHAR, sizeof(StringRef), true}, }; SchemaSchemaPrivilegesScanner::SchemaSchemaPrivilegesScanner() @@ -102,13 +102,13 @@ Status SchemaSchemaPrivilegesScanner::fill_one_col(const std::string* src, MemPo if (nullptr == slot || nullptr == pool || nullptr == src) { return Status::InternalError("input pointer is nullptr."); } - StringValue* str_slot = reinterpret_cast(slot); - str_slot->len = src->length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { - return Status::InternalError("Allocate memcpy failed."); + StringRef* str_slot = reinterpret_cast(slot); + str_slot->size = src->length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { + return Status::InternalError("Allocate memory failed."); } - memcpy(str_slot->ptr, src->c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), src->c_str(), str_slot->size); return Status::OK(); } diff --git a/be/src/exec/schema_scanner/schema_schemata_scanner.cpp b/be/src/exec/schema_scanner/schema_schemata_scanner.cpp index 278e93ddb1..10427963f9 100644 --- a/be/src/exec/schema_scanner/schema_schemata_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_schemata_scanner.cpp @@ -19,24 +19,24 @@ #include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaSchemataScanner::_s_columns[] = { // name, type, size - {"CATALOG_NAME", TYPE_VARCHAR, sizeof(StringValue), true}, - {"SCHEMA_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"DEFAULT_CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"DEFAULT_COLLATION_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"SQL_PATH", TYPE_VARCHAR, sizeof(StringValue), true}, + {"CATALOG_NAME", TYPE_VARCHAR, sizeof(StringRef), true}, + {"SCHEMA_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"DEFAULT_CHARACTER_SET_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"DEFAULT_COLLATION_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"SQL_PATH", TYPE_VARCHAR, sizeof(StringRef), true}, }; SchemaSchemataScanner::SchemaSchemataScanner() : SchemaScanner(_s_columns, sizeof(_s_columns) / sizeof(SchemaScanner::ColumnDesc)), _db_index(0) {} -SchemaSchemataScanner::~SchemaSchemataScanner() {} +SchemaSchemataScanner::~SchemaSchemataScanner() = default; Status SchemaSchemataScanner::start(RuntimeState* state) { if (!_is_init) { @@ -80,43 +80,43 @@ Status SchemaSchemataScanner::fill_one_row(Tuple* tuple, MemPool* pool) { tuple->set_null(_tuple_desc->slots()[0]->null_indicator_offset()); } else { void* slot = tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); std::string catalog_name = _db_result.catalogs[_db_index]; - str_slot->ptr = (char*)pool->allocate(catalog_name.size()); - str_slot->len = catalog_name.size(); - memcpy(str_slot->ptr, catalog_name.c_str(), str_slot->len); + str_slot->data = (char*)pool->allocate(catalog_name.size()); + str_slot->size = catalog_name.size(); + memcpy(const_cast(str_slot->data), catalog_name.c_str(), str_slot->size); } } // schema { void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); std::string db_name = SchemaHelper::extract_db_name(_db_result.dbs[_db_index]); - str_slot->ptr = (char*)pool->allocate(db_name.size()); - str_slot->len = db_name.size(); - memcpy(str_slot->ptr, db_name.c_str(), str_slot->len); + str_slot->data = (char*)pool->allocate(db_name.size()); + str_slot->size = db_name.size(); + memcpy(const_cast(str_slot->data), db_name.c_str(), str_slot->size); } // DEFAULT_CHARACTER_SET_NAME { void* slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->len = strlen("utf8") + 1; - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { + StringRef* str_slot = reinterpret_cast(slot); + str_slot->size = strlen("utf8") + 1; + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { return Status::InternalError("Allocate memory failed."); } - memcpy(str_slot->ptr, "utf8", str_slot->len); + memcpy(const_cast(str_slot->data), "utf8", str_slot->size); } // DEFAULT_COLLATION_NAME { void* slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->len = strlen("utf8_general_ci") + 1; - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { + StringRef* str_slot = reinterpret_cast(slot); + str_slot->size = strlen("utf8_general_ci") + 1; + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { return Status::InternalError("Allocate memory failed."); } - memcpy(str_slot->ptr, "utf8_general_ci", str_slot->len); + memcpy(const_cast(str_slot->data), "utf8_general_ci", str_slot->size); } // SQL_PATH { tuple->set_null(_tuple_desc->slots()[4]->null_indicator_offset()); } diff --git a/be/src/exec/schema_scanner/schema_statistics_scanner.cpp b/be/src/exec/schema_scanner/schema_statistics_scanner.cpp index 51835ed22a..366765dda6 100644 --- a/be/src/exec/schema_scanner/schema_statistics_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_statistics_scanner.cpp @@ -18,27 +18,27 @@ #include "exec/schema_scanner/schema_statistics_scanner.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaStatisticsScanner::_s_cols_statistics[] = { // name, type, size, is_null - {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, - {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringRef), false}, + {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, {"NON_UNIQUE", TYPE_BIGINT, sizeof(int64_t), false}, - {"INDEX_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, - {"INDEX_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, + {"INDEX_SCHEMA", TYPE_VARCHAR, sizeof(StringRef), false}, + {"INDEX_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, {"SEQ_IN_INDEX", TYPE_BIGINT, sizeof(int64_t), false}, - {"COLUMN_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"COLLATION", TYPE_VARCHAR, sizeof(StringValue), true}, + {"COLUMN_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"COLLATION", TYPE_VARCHAR, sizeof(StringRef), true}, {"CARDINALITY", TYPE_BIGINT, sizeof(int64_t), true}, {"SUB_PART", TYPE_BIGINT, sizeof(int64_t), true}, - {"PACKED", TYPE_VARCHAR, sizeof(StringValue), true}, - {"NULLABLE", TYPE_VARCHAR, sizeof(StringValue), false}, - {"INDEX_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, - {"COMMENT", TYPE_VARCHAR, sizeof(StringValue), true}, + {"PACKED", TYPE_VARCHAR, sizeof(StringRef), true}, + {"NULLABLE", TYPE_VARCHAR, sizeof(StringRef), false}, + {"INDEX_TYPE", TYPE_VARCHAR, sizeof(StringRef), false}, + {"COMMENT", TYPE_VARCHAR, sizeof(StringRef), true}, }; SchemaStatisticsScanner::SchemaStatisticsScanner() diff --git a/be/src/exec/schema_scanner/schema_table_privileges_scanner.cpp b/be/src/exec/schema_scanner/schema_table_privileges_scanner.cpp index 372e721c28..1d572bc276 100644 --- a/be/src/exec/schema_scanner/schema_table_privileges_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_table_privileges_scanner.cpp @@ -19,18 +19,18 @@ #include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaTablePrivilegesScanner::_s_tbls_columns[] = { // name, type, size, is_null - {"GRANTEE", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, - {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"PRIVILEGE_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, - {"IS_GRANTABLE", TYPE_VARCHAR, sizeof(StringValue), true}, + {"GRANTEE", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringRef), false}, + {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"PRIVILEGE_TYPE", TYPE_VARCHAR, sizeof(StringRef), false}, + {"IS_GRANTABLE", TYPE_VARCHAR, sizeof(StringRef), true}, }; SchemaTablePrivilegesScanner::SchemaTablePrivilegesScanner() @@ -111,13 +111,13 @@ Status SchemaTablePrivilegesScanner::fill_one_col(const std::string* src, MemPoo if (nullptr == slot || nullptr == pool || nullptr == src) { return Status::InternalError("input pointer is nullptr."); } - StringValue* str_slot = reinterpret_cast(slot); - str_slot->len = src->length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { - return Status::InternalError("Allocate memcpy failed."); + StringRef* str_slot = reinterpret_cast(slot); + str_slot->size = src->length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { + return Status::InternalError("Allocate memory failed."); } - memcpy(str_slot->ptr, src->c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), src->c_str(), str_slot->size); return Status::OK(); } diff --git a/be/src/exec/schema_scanner/schema_tables_scanner.cpp b/be/src/exec/schema_scanner/schema_tables_scanner.cpp index 8bf05b894f..ee9febe3f2 100644 --- a/be/src/exec/schema_scanner/schema_tables_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_tables_scanner.cpp @@ -19,19 +19,19 @@ #include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaTablesScanner::_s_tbls_columns[] = { // name, type, size, is_null - {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, - {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"TABLE_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, - {"ENGINE", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringRef), false}, + {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"TABLE_TYPE", TYPE_VARCHAR, sizeof(StringRef), false}, + {"ENGINE", TYPE_VARCHAR, sizeof(StringRef), true}, {"VERSION", TYPE_BIGINT, sizeof(int64_t), true}, - {"ROW_FORMAT", TYPE_VARCHAR, sizeof(StringValue), true}, + {"ROW_FORMAT", TYPE_VARCHAR, sizeof(StringRef), true}, {"TABLE_ROWS", TYPE_BIGINT, sizeof(int64_t), true}, {"AVG_ROW_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, {"DATA_LENGTH", TYPE_BIGINT, sizeof(int64_t), true}, @@ -42,10 +42,10 @@ SchemaScanner::ColumnDesc SchemaTablesScanner::_s_tbls_columns[] = { {"CREATE_TIME", TYPE_DATETIME, sizeof(DateTimeValue), true}, {"UPDATE_TIME", TYPE_DATETIME, sizeof(DateTimeValue), true}, {"CHECK_TIME", TYPE_DATETIME, sizeof(DateTimeValue), true}, - {"TABLE_COLLATION", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_COLLATION", TYPE_VARCHAR, sizeof(StringRef), true}, {"CHECKSUM", TYPE_BIGINT, sizeof(int64_t), true}, - {"CREATE_OPTIONS", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_COMMENT", TYPE_VARCHAR, sizeof(StringValue), false}, + {"CREATE_OPTIONS", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_COMMENT", TYPE_VARCHAR, sizeof(StringRef), false}, }; SchemaTablesScanner::SchemaTablesScanner() @@ -97,57 +97,57 @@ Status SchemaTablesScanner::fill_one_row(Tuple* tuple, MemPool* pool) { tuple->set_null(_tuple_desc->slots()[0]->null_indicator_offset()); } else { void* slot = tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); std::string catalog_name = _db_result.catalogs[_db_index - 1]; - str_slot->ptr = (char*)pool->allocate(catalog_name.size()); - str_slot->len = catalog_name.size(); - memcpy(str_slot->ptr, catalog_name.c_str(), str_slot->len); + str_slot->data = (char*)pool->allocate(catalog_name.size()); + str_slot->size = catalog_name.size(); + memcpy(const_cast(str_slot->data), catalog_name.c_str(), str_slot->size); } } // schema { void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); std::string db_name = SchemaHelper::extract_db_name(_db_result.dbs[_db_index - 1]); - str_slot->ptr = (char*)pool->allocate(db_name.size()); - str_slot->len = db_name.size(); - memcpy(str_slot->ptr, db_name.c_str(), str_slot->len); + str_slot->data = (char*)pool->allocate(db_name.size()); + str_slot->size = db_name.size(); + memcpy(const_cast(str_slot->data), db_name.c_str(), str_slot->size); } // name { void* slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); const std::string* src = &tbl_status.name; - str_slot->len = src->length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { - return Status::InternalError("Allocate memcpy failed."); + str_slot->size = src->length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { + return Status::InternalError("Allocate memory failed."); } - memcpy(str_slot->ptr, src->c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), src->c_str(), str_slot->size); } // type { void* slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); const std::string* src = &tbl_status.type; - str_slot->len = src->length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { - return Status::InternalError("Allocate memcpy failed."); + str_slot->size = src->length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { + return Status::InternalError("Allocate memory failed."); } - memcpy(str_slot->ptr, src->c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), src->c_str(), str_slot->size); } // engine if (tbl_status.__isset.engine) { void* slot = tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); const std::string* src = &tbl_status.engine; - str_slot->len = src->length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { - return Status::InternalError("Allocate memcpy failed."); + str_slot->size = src->length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { + return Status::InternalError("Allocate memory failed."); } - memcpy(str_slot->ptr, src->c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), src->c_str(), str_slot->size); } else { tuple->set_null(_tuple_desc->slots()[4]->null_indicator_offset()); } @@ -222,14 +222,14 @@ Status SchemaTablesScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // collation if (tbl_status.__isset.collation) { void* slot = tuple->get_slot(_tuple_desc->slots()[17]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); const std::string* src = &tbl_status.collation; - str_slot->len = src->length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { - return Status::InternalError("Allocate memcpy failed."); + str_slot->size = src->length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { + return Status::InternalError("Allocate memory failed."); } - memcpy(str_slot->ptr, src->c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), src->c_str(), str_slot->size); } else { tuple->set_null(_tuple_desc->slots()[17]->null_indicator_offset()); } @@ -240,17 +240,17 @@ Status SchemaTablesScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // create_comment { void* slot = tuple->get_slot(_tuple_desc->slots()[20]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); const std::string* src = &tbl_status.comment; - str_slot->len = src->length(); - if (str_slot->len == 0) { - str_slot->ptr = nullptr; + str_slot->size = src->length(); + if (str_slot->size == 0) { + str_slot->data = nullptr; } else { - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { - return Status::InternalError("Allocate memcpy failed."); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { + return Status::InternalError("Allocate memory failed."); } - memcpy(str_slot->ptr, src->c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), src->c_str(), str_slot->size); } } _table_index++; diff --git a/be/src/exec/schema_scanner/schema_user_privileges_scanner.cpp b/be/src/exec/schema_scanner/schema_user_privileges_scanner.cpp index 533311319e..e2a1b86eb2 100644 --- a/be/src/exec/schema_scanner/schema_user_privileges_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_user_privileges_scanner.cpp @@ -19,16 +19,16 @@ #include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaUserPrivilegesScanner::_s_tbls_columns[] = { // name, type, size, is_null - {"GRANTEE", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - {"PRIVILEGE_TYPE", TYPE_VARCHAR, sizeof(StringValue), false}, - {"IS_GRANTABLE", TYPE_VARCHAR, sizeof(StringValue), true}, + {"GRANTEE", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringRef), true}, + {"PRIVILEGE_TYPE", TYPE_VARCHAR, sizeof(StringRef), false}, + {"IS_GRANTABLE", TYPE_VARCHAR, sizeof(StringRef), true}, }; SchemaUserPrivilegesScanner::SchemaUserPrivilegesScanner() @@ -93,13 +93,13 @@ Status SchemaUserPrivilegesScanner::fill_one_col(const std::string* src, MemPool if (nullptr == slot || nullptr == pool || nullptr == src) { return Status::InternalError("input pointer is nullptr."); } - StringValue* str_slot = reinterpret_cast(slot); - str_slot->len = src->length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { - return Status::InternalError("Allocate memcpy failed."); + StringRef* str_slot = reinterpret_cast(slot); + str_slot->size = src->length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { + return Status::InternalError("Allocate memory failed."); } - memcpy(str_slot->ptr, src->c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), src->c_str(), str_slot->size); return Status::OK(); } diff --git a/be/src/exec/schema_scanner/schema_variables_scanner.cpp b/be/src/exec/schema_scanner/schema_variables_scanner.cpp index 69842d81f0..f550e8a0dd 100644 --- a/be/src/exec/schema_scanner/schema_variables_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_variables_scanner.cpp @@ -20,14 +20,14 @@ #include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" #include "runtime/runtime_state.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaVariablesScanner::_s_vars_columns[] = { // name, type, size - {"VARIABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"VARIABLE_VALUE", TYPE_VARCHAR, sizeof(StringValue), false}, + {"VARIABLE_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"VARIABLE_VALUE", TYPE_VARCHAR, sizeof(StringRef), false}, }; SchemaVariablesScanner::SchemaVariablesScanner(TVarType::type type) @@ -65,26 +65,26 @@ Status SchemaVariablesScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // variables names { void* slot = tuple->get_slot(_tuple_desc->slots()[0]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); int len = strlen(_begin->first.c_str()); - str_slot->ptr = (char*)pool->allocate(len + 1); - if (nullptr == str_slot->ptr) { + str_slot->data = (char*)pool->allocate(len + 1); + if (nullptr == str_slot->data) { return Status::InternalError("No Memory."); } - memcpy(str_slot->ptr, _begin->first.c_str(), len + 1); - str_slot->len = len; + memcpy(const_cast(str_slot->data), _begin->first.c_str(), len + 1); + str_slot->size = len; } // value { void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); int len = strlen(_begin->second.c_str()); - str_slot->ptr = (char*)pool->allocate(len + 1); - if (nullptr == str_slot->ptr) { + str_slot->data = (char*)pool->allocate(len + 1); + if (nullptr == str_slot->data) { return Status::InternalError("No Memory."); } - memcpy(str_slot->ptr, _begin->second.c_str(), len + 1); - str_slot->len = len; + memcpy(const_cast(str_slot->data), _begin->second.c_str(), len + 1); + str_slot->size = len; } ++_begin; return Status::OK(); diff --git a/be/src/exec/schema_scanner/schema_views_scanner.cpp b/be/src/exec/schema_scanner/schema_views_scanner.cpp index 4df4d1de31..af04de20df 100644 --- a/be/src/exec/schema_scanner/schema_views_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_views_scanner.cpp @@ -19,22 +19,22 @@ #include "exec/schema_scanner/schema_helper.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { SchemaScanner::ColumnDesc SchemaViewsScanner::_s_tbls_columns[] = { // name, type, size, is_null - {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringValue), true}, - {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringValue), false}, - {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringValue), false}, - {"VIEW_DEFINITION", TYPE_VARCHAR, sizeof(StringValue), true}, - {"CHECK_OPTION", TYPE_VARCHAR, sizeof(StringValue), true}, - {"IS_UPDATABLE", TYPE_VARCHAR, sizeof(StringValue), true}, - {"DEFINER", TYPE_VARCHAR, sizeof(StringValue), true}, - {"SECURITY_TYPE", TYPE_VARCHAR, sizeof(StringValue), true}, - {"CHARACTER_SET_CLIENT", TYPE_VARCHAR, sizeof(StringValue), true}, - {"COLLATION_CONNECTION", TYPE_VARCHAR, sizeof(StringValue), true}, + {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringRef), true}, + {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringRef), false}, + {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, + {"VIEW_DEFINITION", TYPE_VARCHAR, sizeof(StringRef), true}, + {"CHECK_OPTION", TYPE_VARCHAR, sizeof(StringRef), true}, + {"IS_UPDATABLE", TYPE_VARCHAR, sizeof(StringRef), true}, + {"DEFINER", TYPE_VARCHAR, sizeof(StringRef), true}, + {"SECURITY_TYPE", TYPE_VARCHAR, sizeof(StringRef), true}, + {"CHARACTER_SET_CLIENT", TYPE_VARCHAR, sizeof(StringRef), true}, + {"COLLATION_CONNECTION", TYPE_VARCHAR, sizeof(StringRef), true}, }; SchemaViewsScanner::SchemaViewsScanner() @@ -85,100 +85,100 @@ Status SchemaViewsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // schema { void* slot = tuple->get_slot(_tuple_desc->slots()[1]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); std::string db_name = SchemaHelper::extract_db_name(_db_result.dbs[_db_index - 1]); - str_slot->ptr = (char*)pool->allocate(db_name.size()); - str_slot->len = db_name.size(); - memcpy(str_slot->ptr, db_name.c_str(), str_slot->len); + str_slot->data = (char*)pool->allocate(db_name.size()); + str_slot->size = db_name.size(); + memcpy(const_cast(str_slot->data), db_name.c_str(), str_slot->size); } // name { void* slot = tuple->get_slot(_tuple_desc->slots()[2]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); const std::string* src = &tbl_status.name; - str_slot->len = src->length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { + str_slot->size = src->length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { return Status::InternalError("Allocate memcpy failed."); } - memcpy(str_slot->ptr, src->c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), src->c_str(), str_slot->size); } // definition { void* slot = tuple->get_slot(_tuple_desc->slots()[3]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); const std::string* ddl_sql = &tbl_status.ddl_sql; - str_slot->len = ddl_sql->length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { + str_slot->size = ddl_sql->length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { return Status::InternalError("Allocate memcpy failed."); } - memcpy(str_slot->ptr, ddl_sql->c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), ddl_sql->c_str(), str_slot->size); } // check_option { void* slot = tuple->get_slot(_tuple_desc->slots()[4]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); // This is from views in mysql const std::string check_option = "NONE"; - str_slot->len = check_option.length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { + str_slot->size = check_option.length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { return Status::InternalError("Allocate memcpy failed."); } - memcpy(str_slot->ptr, check_option.c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), check_option.c_str(), str_slot->size); } // is_updatable { void* slot = tuple->get_slot(_tuple_desc->slots()[5]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); // This is from views in mysql const std::string is_updatable = "NO"; - str_slot->len = is_updatable.length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { + str_slot->size = is_updatable.length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { return Status::InternalError("Allocate memcpy failed."); } - memcpy(str_slot->ptr, is_updatable.c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), is_updatable.c_str(), str_slot->size); } // definer { void* slot = tuple->get_slot(_tuple_desc->slots()[6]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); // This is from views in mysql const std::string definer = "root@%"; - str_slot->len = definer.length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { + str_slot->size = definer.length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { return Status::InternalError("Allocate memcpy failed."); } - memcpy(str_slot->ptr, definer.c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), definer.c_str(), str_slot->size); } // security_type { void* slot = tuple->get_slot(_tuple_desc->slots()[7]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); // This is from views in mysql const std::string security_type = "DEFINER"; - str_slot->len = security_type.length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { - return Status::InternalError("Allocate memcpy failed."); + str_slot->size = security_type.length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { + return Status::InternalError("Allocate memory failed."); } - memcpy(str_slot->ptr, security_type.c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), security_type.c_str(), str_slot->size); } // character_set_client { void* slot = tuple->get_slot(_tuple_desc->slots()[8]->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); + StringRef* str_slot = reinterpret_cast(slot); // This is from views in mysql const std::string encoding = "utf8"; - str_slot->len = encoding.length(); - str_slot->ptr = (char*)pool->allocate(str_slot->len); - if (nullptr == str_slot->ptr) { - return Status::InternalError("Allocate memcpy failed."); + str_slot->size = encoding.length(); + str_slot->data = (char*)pool->allocate(str_slot->size); + if (nullptr == str_slot->data) { + return Status::InternalError("Allocate memory failed."); } - memcpy(str_slot->ptr, encoding.c_str(), str_slot->len); + memcpy(const_cast(str_slot->data), encoding.c_str(), str_slot->size); } // collation_connection { tuple->set_null(_tuple_desc->slots()[9]->null_indicator_offset()); } diff --git a/be/src/exec/text_converter.cpp b/be/src/exec/text_converter.cpp index 5888eefc43..0839578098 100644 --- a/be/src/exec/text_converter.cpp +++ b/be/src/exec/text_converter.cpp @@ -22,12 +22,12 @@ #include "runtime/decimalv2_value.h" #include "runtime/descriptors.h" #include "runtime/mem_pool.h" -#include "runtime/string_value.h" #include "runtime/tuple.h" #include "util/string_parser.hpp" #include "util/types.h" #include "vec/columns/column_complex.h" #include "vec/columns/column_nullable.h" +#include "vec/common/string_ref.h" #include "vec/runtime/vdatetime_value.h" namespace doris { @@ -188,10 +188,10 @@ bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, return true; } -void TextConverter::unescape_string(StringValue* value, MemPool* pool) { - char* new_data = reinterpret_cast(pool->allocate(value->len)); - unescape_string(value->ptr, new_data, &value->len); - value->ptr = new_data; +void TextConverter::unescape_string(StringRef* value, MemPool* pool) { + char* new_data = reinterpret_cast(pool->allocate(value->size)); + unescape_string(value->data, new_data, &value->size); + value->data = new_data; } void TextConverter::unescape_string(const char* src, char* dest, size_t* len) { diff --git a/be/src/exec/text_converter.h b/be/src/exec/text_converter.h index 79deac95ff..69c27e2c29 100644 --- a/be/src/exec/text_converter.h +++ b/be/src/exec/text_converter.h @@ -23,7 +23,7 @@ namespace doris { class MemPool; class SlotDescriptor; class Status; -struct StringValue; +struct StringRef; class Tuple; class TupleDescriptor; @@ -68,7 +68,7 @@ public: void unescape_string_on_spot(const char* src, size_t* len); // Removes escape characters from 'str', allocating a new string from pool. // 'str' is updated with the new ptr and length. - void unescape_string(StringValue* str, MemPool* pool); + void unescape_string(StringRef* str, MemPool* pool); private: char _escape_char; diff --git a/be/src/exec/text_converter.hpp b/be/src/exec/text_converter.hpp index 55aee22d0a..b6add9183f 100644 --- a/be/src/exec/text_converter.hpp +++ b/be/src/exec/text_converter.hpp @@ -25,7 +25,6 @@ #include "runtime/decimalv2_value.h" #include "runtime/descriptors.h" #include "runtime/mem_pool.h" -#include "runtime/string_value.h" #include "runtime/tuple.h" #include "text_converter.h" #include "util/binary_cast.hpp" @@ -43,7 +42,7 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, Tuple* tu const char* data, int len, bool copy_string, bool need_escape, MemPool* pool) { //Small batch import only \N is considered to be NULL, there is no replace_value function for batch import - if (true == slot_desc->is_nullable()) { + if (slot_desc->is_nullable()) { if (len == 2 && data[0] == '\\' && data[1] == 'N') { tuple->set_null(slot_desc->null_indicator_offset()); return true; @@ -61,20 +60,20 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, Tuple* tu case TYPE_VARCHAR: case TYPE_CHAR: case TYPE_STRING: { - StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = const_cast(data); - str_slot->len = len; + StringRef* str_slot = reinterpret_cast(slot); + str_slot->data = const_cast(data); + str_slot->size = len; if (len != 0 && (copy_string || need_escape)) { - DCHECK(pool != NULL); + DCHECK(pool != nullptr); char* slot_data = reinterpret_cast(pool->allocate(len)); if (need_escape) { - unescape_string(data, slot_data, &str_slot->len); + unescape_string(data, slot_data, &str_slot->size); } else { - memcpy(slot_data, data, str_slot->len); + memcpy(slot_data, data, str_slot->size); } - str_slot->ptr = slot_data; + str_slot->data = slot_data; } break; diff --git a/be/src/exprs/agg_fn_evaluator.cpp b/be/src/exprs/agg_fn_evaluator.cpp index 5998c5c79f..e3a4b876da 100644 --- a/be/src/exprs/agg_fn_evaluator.cpp +++ b/be/src/exprs/agg_fn_evaluator.cpp @@ -309,8 +309,7 @@ inline void AggFnEvaluator::set_any_val(const void* slot, const TypeDescriptor& case TYPE_OBJECT: case TYPE_STRING: case TYPE_QUANTILE_STATE: - reinterpret_cast(slot)->to_string_val( - reinterpret_cast(dst)); + reinterpret_cast(slot)->to_string_val(reinterpret_cast(dst)); return; case TYPE_DATE: @@ -381,8 +380,7 @@ inline void AggFnEvaluator::set_output_slot(const AnyVal* src, const SlotDescrip case TYPE_OBJECT: case TYPE_QUANTILE_STATE: case TYPE_STRING: - *reinterpret_cast(slot) = - StringValue::from_string_val(*reinterpret_cast(src)); + *reinterpret_cast(slot) = StringRef(*reinterpret_cast(src)); return; case TYPE_DATE: @@ -484,11 +482,11 @@ bool AggFnEvaluator::count_distinct_data_filter(TupleRow* row, Tuple* dst) { _string_buffer.reset(new char[_string_buffer_len]); } - StringValue string_val(_string_buffer.get(), total_len); + StringRef string_ref(_string_buffer.get(), total_len); // the content of StringVal: // header: the STRING_VALUE's len // body: all input parameters' content - char* begin = string_val.ptr; + char* begin = const_cast(string_ref.data); for (int i = 0; i < vec_size; i++) { memcpy(begin, &vec_string_len[0], int_size); @@ -584,11 +582,11 @@ bool AggFnEvaluator::count_distinct_data_filter(TupleRow* row, Tuple* dst) { } } - DCHECK(begin == string_val.ptr + string_val.len) + DCHECK(begin == string_ref.data + string_ref.size) << "COUNT_DISTINCT: StringVal's len doesn't match"; bool is_add_buckets = false; - bool is_filter = is_in_hybridmap(&string_val, dst, &is_add_buckets); - update_mem_trackers(is_filter, is_add_buckets, string_val.len); + bool is_filter = is_in_hybridmap(&string_ref, dst, &is_add_buckets); + update_mem_trackers(is_filter, is_add_buckets, string_ref.size); return is_filter; } diff --git a/be/src/exprs/aggregate_functions.cpp b/be/src/exprs/aggregate_functions.cpp index 202dd1852b..b84bbec8fa 100644 --- a/be/src/exprs/aggregate_functions.cpp +++ b/be/src/exprs/aggregate_functions.cpp @@ -32,10 +32,10 @@ #include "olap/hll.h" #include "runtime/datetime_value.h" #include "runtime/decimalv2_value.h" -#include "runtime/string_value.h" #include "udf/udf_internal.h" #include "util/counts.h" #include "util/tdigest.h" +#include "vec/common/string_ref.h" // TODO: this file should be cross compiled and then all of the builtin // aggregate functions will have a codegen enabled path. Then we can remove @@ -703,7 +703,7 @@ void AggregateFunctions::min(FunctionContext* ctx, const StringVal& src, StringV return; } - if (dst->is_null || StringValue::from_string_val(src) < StringValue::from_string_val(*dst)) { + if (dst->is_null || StringRef(src) < StringRef(*dst)) { if (!dst->is_null) { ctx->free(dst->ptr); } @@ -719,7 +719,7 @@ void AggregateFunctions::max(FunctionContext* ctx, const StringVal& src, StringV return; } - if (dst->is_null || StringValue::from_string_val(src) > StringValue::from_string_val(*dst)) { + if (dst->is_null || StringRef(src) > StringRef(*dst)) { if (!dst->is_null) { ctx->free(dst->ptr); } @@ -1369,15 +1369,15 @@ public: static void destroy(const StringVal& dst) { delete (MultiDistinctStringCountState*)dst.ptr; } - void update(StringValue* sv) { _set.insert(sv); } + void update(StringRef* sv) { _set.insert(sv); } StringVal serialize(FunctionContext* ctx) { // calculate total serialize buffer length int total_serialized_set_length = 1; HybridSetBase::IteratorBase* iterator = _set.begin(); while (iterator->has_next()) { - const StringValue* value = reinterpret_cast(iterator->get_value()); - total_serialized_set_length += STRING_LENGTH_RECORD_LENGTH + value->len; + const StringRef* value = reinterpret_cast(iterator->get_value()); + total_serialized_set_length += STRING_LENGTH_RECORD_LENGTH + value->size; iterator->next(); } StringVal result(ctx, total_serialized_set_length); @@ -1387,14 +1387,14 @@ public: writer++; iterator = _set.begin(); while (iterator->has_next()) { - const StringValue* value = reinterpret_cast(iterator->get_value()); + const StringRef* value = reinterpret_cast(iterator->get_value()); // length, it is unnecessary to consider little or big endian for // all running in little-endian. - *(int*)writer = value->len; + *(int*)writer = value->size; writer += STRING_LENGTH_RECORD_LENGTH; // value - memcpy(writer, value->ptr, value->len); - writer += value->len; + memcpy(writer, value->data, value->size); + writer += value->size; iterator->next(); } return result; @@ -1410,7 +1410,7 @@ public: while (reader < end) { const int length = *(int*)reader; reader += STRING_LENGTH_RECORD_LENGTH; - StringValue value((char*)reader, length); + StringRef value((char*)reader, length); _set.insert(&value); reader += length; } @@ -1626,7 +1626,7 @@ void AggregateFunctions::count_distinct_string_update(FunctionContext* ctx, Stri if (src.is_null) return; MultiDistinctStringCountState* state = reinterpret_cast(dst->ptr); - StringValue sv = StringValue::from_string_val(src); + StringRef sv = StringRef(src); state->update(&sv); } diff --git a/be/src/exprs/anyval_util.h b/be/src/exprs/anyval_util.h index 007f95f0eb..69144524c5 100644 --- a/be/src/exprs/anyval_util.h +++ b/be/src/exprs/anyval_util.h @@ -202,11 +202,11 @@ public: template static Val max_val(FunctionContext* ctx) { if constexpr (std::is_same_v) { - StringValue sv = type_limit::max(); + StringRef sv = type_limit::max(); StringVal max_val; - max_val.ptr = ctx->allocate(sv.len); - memcpy(max_val.ptr, sv.ptr, sv.len); - max_val.len = sv.len; + max_val.ptr = ctx->allocate(sv.size); + memcpy(max_val.ptr, sv.data, sv.size); + max_val.len = sv.size; return max_val; } else if constexpr (std::is_same_v) { @@ -416,8 +416,8 @@ public: case TYPE_OBJECT: case TYPE_QUANTILE_STATE: case TYPE_STRING: - reinterpret_cast(slot)->to_string_val( - reinterpret_cast(dst)); + reinterpret_cast(slot)->to_string_val( + reinterpret_cast(dst)); return; case TYPE_DECIMALV2: reinterpret_cast(dst)->val = @@ -502,8 +502,8 @@ template <> inline bool AnyValUtil::equals_internal(const StringVal& x, const StringVal& y) { DCHECK(!x.is_null); DCHECK(!y.is_null); - StringValue x_sv = StringValue::from_string_val(x); - StringValue y_sv = StringValue::from_string_val(y); + StringRef x_sv = StringRef(x); + StringRef y_sv = StringRef(y); return x_sv == y_sv; } diff --git a/be/src/exprs/binary_predicate.cpp b/be/src/exprs/binary_predicate.cpp index 6fa54942de..7408c3307a 100644 --- a/be/src/exprs/binary_predicate.cpp +++ b/be/src/exprs/binary_predicate.cpp @@ -21,7 +21,7 @@ #include "gen_cpp/Exprs_types.h" #include "runtime/decimalv2_value.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { @@ -396,8 +396,8 @@ DATETIME_BINARY_PRED_FNS() if (v2.is_null) { \ return BooleanVal::null(); \ } \ - StringValue pv1 = StringValue::from_string_val(v1); \ - StringValue pv2 = StringValue::from_string_val(v2); \ + StringRef pv1 = StringRef(v1); \ + StringRef pv2 = StringRef(v2); \ return BooleanVal(pv1 OP pv2); \ } diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp index 2b40bd7322..9438b2a39e 100644 --- a/be/src/exprs/bitmap_function.cpp +++ b/be/src/exprs/bitmap_function.cpp @@ -260,7 +260,7 @@ StringVal BitmapFunctions::bitmap_serialize(FunctionContext* ctx, const StringVa return result; } -// This is a init function for intersect_count not for bitmap_intersect, not for _orthogonal_bitmap_intersect(bitmap,t,t) +// This is an init function for intersect_count not for bitmap_intersect, not for _orthogonal_bitmap_intersect(bitmap,t,t) template void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal* dst) { dst->is_null = false; @@ -277,7 +277,7 @@ void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal* dst dst->ptr = (uint8_t*)intersect; } -// This is a update function for intersect_count/ORTHOGONAL_BITMAP_INTERSECT_COUNT/ORTHOGONAL_BITMAP_INTERSECT(bitmap,t,t) +// This is an update function for intersect_count/ORTHOGONAL_BITMAP_INTERSECT_COUNT/ORTHOGONAL_BITMAP_INTERSECT(bitmap,t,t) // not for bitmap_intersect(Bitmap) template void BitmapFunctions::bitmap_intersect_update(FunctionContext* ctx, const StringVal& src, @@ -956,8 +956,8 @@ template void BitmapFunctions::bitmap_intersect_init FunctionContext* ctx, StringVal* dst); template void BitmapFunctions::bitmap_intersect_init( FunctionContext* ctx, StringVal* dst); -template void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, - StringVal* dst); +template void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, + StringVal* dst); template void BitmapFunctions::bitmap_intersect_update( FunctionContext* ctx, const StringVal& src, const TinyIntVal& key, int num_key, @@ -986,7 +986,7 @@ template void BitmapFunctions::bitmap_intersect_update( FunctionContext* ctx, const StringVal& src, const DecimalV2Val& key, int num_key, const DecimalV2Val* keys, const StringVal* dst); -template void BitmapFunctions::bitmap_intersect_update( +template void BitmapFunctions::bitmap_intersect_update( FunctionContext* ctx, const StringVal& src, const StringVal& key, int num_key, const StringVal* keys, const StringVal* dst); @@ -1017,9 +1017,9 @@ template void BitmapFunctions::bitmap_intersect_merge(FunctionCon template void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, const StringVal& src, const StringVal* dst); -template void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, - const StringVal& src, - const StringVal* dst); +template void BitmapFunctions::bitmap_intersect_merge(FunctionContext* ctx, + const StringVal& src, + const StringVal* dst); template StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, const StringVal& src); @@ -1039,8 +1039,8 @@ template StringVal BitmapFunctions::bitmap_intersect_serialize(Fu const StringVal& src); template StringVal BitmapFunctions::bitmap_intersect_serialize( FunctionContext* ctx, const StringVal& src); -template StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, - const StringVal& src); +template StringVal BitmapFunctions::bitmap_intersect_serialize(FunctionContext* ctx, + const StringVal& src); template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, const StringVal& src); @@ -1060,8 +1060,8 @@ template BigIntVal BitmapFunctions::bitmap_intersect_finalize(Fun const StringVal& src); template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, const StringVal& src); -template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, - const StringVal& src); +template BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, + const StringVal& src); template void BitmapFunctions::orthogonal_bitmap_intersect_count_init( FunctionContext* ctx, StringVal* dst); @@ -1075,7 +1075,7 @@ template void BitmapFunctions::orthogonal_bitmap_intersect_count_init( FunctionContext* ctx, StringVal* dst); -template void BitmapFunctions::orthogonal_bitmap_intersect_count_init( +template void BitmapFunctions::orthogonal_bitmap_intersect_count_init( FunctionContext* ctx, StringVal* dst); template void BitmapFunctions::orthogonal_bitmap_intersect_init( @@ -1090,7 +1090,7 @@ template void BitmapFunctions::orthogonal_bitmap_intersect_init FunctionContext* ctx, StringVal* dst); template void BitmapFunctions::orthogonal_bitmap_intersect_init( FunctionContext* ctx, StringVal* dst); -template void BitmapFunctions::orthogonal_bitmap_intersect_init( +template void BitmapFunctions::orthogonal_bitmap_intersect_init( FunctionContext* ctx, StringVal* dst); template StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize( @@ -1105,7 +1105,7 @@ template StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize FunctionContext* ctx, const StringVal& src); template StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize( FunctionContext* ctx, const StringVal& src); -template StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize( +template StringVal BitmapFunctions::orthogonal_bitmap_intersect_serialize( FunctionContext* ctx, const StringVal& src); template BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize( @@ -1120,7 +1120,7 @@ template BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize( FunctionContext* ctx, const StringVal& src); template BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize( FunctionContext* ctx, const StringVal& src); -template BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize( +template BigIntVal BitmapFunctions::orthogonal_bitmap_intersect_finalize( FunctionContext* ctx, const StringVal& src); template StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize( @@ -1135,6 +1135,6 @@ template StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize< FunctionContext* ctx, const StringVal& src); template StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize( FunctionContext* ctx, const StringVal& src); -template StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize( +template StringVal BitmapFunctions::orthogonal_bitmap_intersect_count_serialize( FunctionContext* ctx, const StringVal& src); } // namespace doris diff --git a/be/src/exprs/bloomfilter_predicate.h b/be/src/exprs/bloomfilter_predicate.h index ec3b24434e..bf77a8fb3d 100644 --- a/be/src/exprs/bloomfilter_predicate.h +++ b/be/src/exprs/bloomfilter_predicate.h @@ -294,17 +294,17 @@ struct StringFindOp { } void insert(BloomFilterAdaptor& bloom_filter, const void* data) const { - const auto* value = reinterpret_cast(data); + const auto* value = reinterpret_cast(data); if (value) { - bloom_filter.add_bytes(value->ptr, value->len); + bloom_filter.add_bytes(value->data, value->size); } } bool find(const BloomFilterAdaptor& bloom_filter, const void* data) const { - const auto* value = reinterpret_cast(data); + const auto* value = reinterpret_cast(data); if (value == nullptr) { return false; } - return bloom_filter.test(Slice(value->ptr, value->len)); + return bloom_filter.test(Slice(value->data, value->size)); } bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const { return StringFindOp::find(bloom_filter, data); @@ -318,13 +318,13 @@ struct StringFindOp { // when filer used by the storage engine struct FixedStringFindOp : public StringFindOp { bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* input_data) const { - const auto* value = reinterpret_cast(input_data); - int64_t size = value->len; - char* data = value->ptr; + const auto* value = reinterpret_cast(input_data); + int64_t size = value->size; + const char* data = value->data; while (size > 0 && data[size - 1] == '\0') { size--; } - return bloom_filter.test(Slice(value->ptr, size)); + return bloom_filter.test(Slice(value->data, size)); } }; diff --git a/be/src/exprs/create_predicate_function.h b/be/src/exprs/create_predicate_function.h index a9418a6955..3178b7a970 100644 --- a/be/src/exprs/create_predicate_function.h +++ b/be/src/exprs/create_predicate_function.h @@ -43,8 +43,8 @@ public: template static BasePtr get_function() { using CppType = typename PrimitiveTypeTraits::CppType; - using Set = std::conditional_t, StringSet, - HybridSet>; + using Set = + std::conditional_t, StringSet, HybridSet>; return new Set(); }; }; diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h index 8588f37429..403021e1ce 100644 --- a/be/src/exprs/expr.h +++ b/be/src/exprs/expr.h @@ -29,12 +29,11 @@ #include "gen_cpp/Opcodes_types.h" #include "runtime/descriptors.h" #include "runtime/large_int_value.h" -#include "runtime/string_value.h" -#include "runtime/string_value.hpp" #include "runtime/tuple.h" #include "runtime/tuple_row.h" #include "udf/udf.h" #include "util/string_parser.hpp" +#include "vec/common/string_ref.h" #include "vec/data_types/data_type_decimal.h" #include "vec/io/io_helper.h" @@ -585,7 +584,7 @@ Status create_texpr_literal_node(const void* data, TExprNode* node, int precisio (*node).__set_float_literal(float_literal); (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DOUBLE)); } else if constexpr ((T == TYPE_STRING) || (T == TYPE_CHAR) || (T == TYPE_VARCHAR)) { - auto origin_value = reinterpret_cast(data); + auto origin_value = reinterpret_cast(data); (*node).__set_node_type(TExprNodeType::STRING_LITERAL); TStringLiteral string_literal; string_literal.__set_value(origin_value->to_string()); diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp index b019de99c0..99aec19935 100644 --- a/be/src/exprs/expr_context.cpp +++ b/be/src/exprs/expr_context.cpp @@ -242,8 +242,8 @@ void* ExprContext::get_value(Expr* e, TupleRow* row, int precision, int scale) { if (v.is_null) { return nullptr; } - _result.string_val.ptr = reinterpret_cast(v.ptr); - _result.string_val.len = v.len; + _result.string_val.data = reinterpret_cast(v.ptr); + _result.string_val.size = v.len; return &_result.string_val; } case TYPE_DATE: diff --git a/be/src/exprs/expr_value.h b/be/src/exprs/expr_value.h index d38dadd45e..a3cc441675 100644 --- a/be/src/exprs/expr_value.h +++ b/be/src/exprs/expr_value.h @@ -23,8 +23,8 @@ #include "runtime/collection_value.h" #include "runtime/datetime_value.h" #include "runtime/decimalv2_value.h" -#include "runtime/string_value.h" #include "runtime/types.h" +#include "vec/common/string_ref.h" namespace doris { @@ -43,7 +43,7 @@ struct ExprValue { float float_val; double double_val; std::string string_data; - StringValue string_val; + StringRef string_val; DateTimeValue datetime_val; doris::vectorized::DateV2Value datev2_val; doris::vectorized::DateV2Value datetimev2_val; @@ -76,13 +76,11 @@ struct ExprValue { ExprValue(int64_t i, int32_t f) : decimalv2_val(i, f) {} // c'tor for string values - ExprValue(const std::string& str) - : string_data(str), - string_val(const_cast(string_data.data()), string_data.size()) {} + ExprValue(const std::string& str) : string_data(str), string_val(string_data) {} // Set string value to copy of str - void set_string_val(const StringValue& str) { - string_data = std::string(str.ptr, str.len); + void set_string_val(const StringRef& str) { + string_data = std::string(str.data, str.size); sync_string_val(); } @@ -95,8 +93,8 @@ struct ExprValue { // string_data. If not called after mutating string_data, // string_val->ptr may point at garbage. void sync_string_val() { - string_val.ptr = const_cast(string_data.data()); - string_val.len = string_data.size(); + string_val.data = string_data.data(); + string_val.size = string_data.size(); } // Sets the value for type to '0' and returns a pointer to the data diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h index a2ca4553da..25606b226e 100644 --- a/be/src/exprs/hybrid_set.h +++ b/be/src/exprs/hybrid_set.h @@ -23,7 +23,7 @@ #include "runtime/decimalv2_value.h" #include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { @@ -159,8 +159,8 @@ public: return; } - const auto* value = reinterpret_cast(data); - std::string str_value(value->ptr, value->len); + const auto* value = reinterpret_cast(data); + std::string str_value(value->data, value->size); _set.insert(str_value); } @@ -185,8 +185,8 @@ public: return false; } - auto* value = reinterpret_cast(data); - std::string_view str_value(const_cast(value->ptr), value->len); + auto* value = reinterpret_cast(data); + std::string_view str_value(const_cast(value->data), value->size); auto it = _set.find(str_value); return !(it == _set.end()); @@ -206,8 +206,8 @@ public: ~Iterator() override = default; bool has_next() const override { return !(_begin == _end); } const void* get_value() override { - _value.ptr = const_cast(_begin->data()); - _value.len = _begin->length(); + _value.data = const_cast(_begin->data()); + _value.size = _begin->length(); return &_value; } void next() override { ++_begin; } @@ -215,7 +215,7 @@ public: private: typename phmap::flat_hash_set::iterator _begin; typename phmap::flat_hash_set::iterator _end; - StringValue _value; + StringRef _value; }; IteratorBase* begin() override { @@ -230,8 +230,8 @@ private: }; // note: Two difference from StringSet -// 1 StringValue has better comparison performance than std::string -// 2 std::string keeps its own memory, bug StringValue just keeps ptr and len, so you the caller should manage memory of StringValue +// 1 StringRef has better comparison performance than std::string +// 2 std::string keeps its own memory, bug StringRef just keeps ptr and len, so you the caller should manage memory of StringRef class StringValueSet : public HybridSetBase { public: StringValueSet() = default; @@ -243,13 +243,13 @@ public: return; } - const auto* value = reinterpret_cast(data); - StringValue sv(value->ptr, value->len); + const auto* value = reinterpret_cast(data); + StringRef sv(value->data, value->size); _set.insert(sv); } void insert(void* data, size_t size) override { - StringValue sv(reinterpret_cast(data), size); + StringRef sv(reinterpret_cast(data), size); _set.insert(sv); } @@ -269,7 +269,7 @@ public: return false; } - auto* value = reinterpret_cast(data); + auto* value = reinterpret_cast(data); auto it = _set.find(*value); return !(it == _set.end()); @@ -280,39 +280,39 @@ public: return false; } - StringValue sv(reinterpret_cast(data), size); + StringRef sv(reinterpret_cast(data), size); auto it = _set.find(sv); return !(it == _set.end()); } class Iterator : public IteratorBase { public: - Iterator(phmap::flat_hash_set::iterator begin, - phmap::flat_hash_set::iterator end) + Iterator(phmap::flat_hash_set::iterator begin, + phmap::flat_hash_set::iterator end) : _begin(begin), _end(end) {} ~Iterator() override = default; bool has_next() const override { return !(_begin == _end); } const void* get_value() override { - _value.ptr = const_cast(_begin->ptr); - _value.len = _begin->len; + _value.data = const_cast(_begin->data); + _value.size = _begin->size; return &_value; } void next() override { ++_begin; } private: - typename phmap::flat_hash_set::iterator _begin; - typename phmap::flat_hash_set::iterator _end; - StringValue _value; + typename phmap::flat_hash_set::iterator _begin; + typename phmap::flat_hash_set::iterator _end; + StringRef _value; }; IteratorBase* begin() override { return _pool.add(new (std::nothrow) Iterator(_set.begin(), _set.end())); } - phmap::flat_hash_set* get_inner_set() { return &_set; } + phmap::flat_hash_set* get_inner_set() { return &_set; } private: - phmap::flat_hash_set _set; + phmap::flat_hash_set _set; ObjectPool _pool; }; diff --git a/be/src/exprs/info_func.cpp b/be/src/exprs/info_func.cpp index 1418b04acd..2277f1cc1f 100644 --- a/be/src/exprs/info_func.cpp +++ b/be/src/exprs/info_func.cpp @@ -25,11 +25,7 @@ InfoFunc::InfoFunc(const TExprNode& node) : Expr(node), _int_value(node.info_func.int_value), _str_value(node.info_func.str_value) {} StringVal InfoFunc::get_string_val(ExprContext* context, TupleRow*) { - StringVal val; - StringValue value(_str_value); - value.to_string_val(&val); - - return val; + return {_str_value.c_str(), static_cast(_str_value.size())}; } BigIntVal InfoFunc::get_big_int_val(ExprContext* context, TupleRow*) { diff --git a/be/src/exprs/like_predicate.cpp b/be/src/exprs/like_predicate.cpp index cd4fa586c4..9ea70ffa6e 100644 --- a/be/src/exprs/like_predicate.cpp +++ b/be/src/exprs/like_predicate.cpp @@ -63,8 +63,8 @@ void LikePredicate::like_prepare(FunctionContext* context, if (pattern_val.is_null) { return; } - StringValue pattern = StringValue::from_string_val(pattern_val); - std::string pattern_str(pattern.ptr, pattern.len); + StringRef pattern = StringRef(pattern_val); + std::string pattern_str(pattern.data, pattern.size); std::string search_string; if (RE2::FullMatch(pattern_str, LIKE_ENDS_WITH_RE, &search_string)) { remove_escape_character(&search_string); @@ -268,10 +268,10 @@ BooleanVal LikePredicate::constant_substring_fn(FunctionContext* context, const } LikePredicateState* state = reinterpret_cast( context->get_function_state(FunctionContext::THREAD_LOCAL)); - if (state->search_string_sv.len == 0) { + if (state->search_string_sv.size == 0) { return BooleanVal(true); } - StringValue pattern_value = StringValue::from_string_val(val); + StringRef pattern_value = StringRef(val); return BooleanVal(state->substring_pattern.search(&pattern_value) != -1); } @@ -282,10 +282,10 @@ BooleanVal LikePredicate::constant_starts_with_fn(FunctionContext* context, cons } LikePredicateState* state = reinterpret_cast( context->get_function_state(FunctionContext::THREAD_LOCAL)); - if (val.len < state->search_string_sv.len) { + if (val.len < state->search_string_sv.size) { return BooleanVal(false); } else { - StringValue v = StringValue(reinterpret_cast(val.ptr), state->search_string_sv.len); + StringRef v = StringRef(reinterpret_cast(val.ptr), state->search_string_sv.size); return BooleanVal(state->search_string_sv.eq((v))); } } @@ -297,12 +297,12 @@ BooleanVal LikePredicate::constant_ends_with_fn(FunctionContext* context, const } LikePredicateState* state = reinterpret_cast( context->get_function_state(FunctionContext::THREAD_LOCAL)); - if (val.len < state->search_string_sv.len) { + if (val.len < state->search_string_sv.size) { return BooleanVal(false); } else { - char* ptr = reinterpret_cast(val.ptr) + val.len - state->search_string_sv.len; - int len = state->search_string_sv.len; - StringValue v = StringValue(ptr, len); + char* ptr = reinterpret_cast(val.ptr) + val.len - state->search_string_sv.size; + int len = state->search_string_sv.size; + StringRef v = StringRef(ptr, len); return BooleanVal(state->search_string_sv.eq(v)); } } @@ -314,7 +314,7 @@ BooleanVal LikePredicate::constant_equals_fn(FunctionContext* context, const Str } LikePredicateState* state = reinterpret_cast( context->get_function_state(FunctionContext::THREAD_LOCAL)); - return BooleanVal(state->search_string_sv.eq(StringValue::from_string_val(val))); + return BooleanVal(state->search_string_sv.eq(StringRef(val))); } BooleanVal LikePredicate::constant_regex_fn_partial(FunctionContext* context, const StringVal& val, diff --git a/be/src/exprs/like_predicate.h b/be/src/exprs/like_predicate.h index c2e4671d89..f9edeb1dd7 100644 --- a/be/src/exprs/like_predicate.h +++ b/be/src/exprs/like_predicate.h @@ -44,7 +44,7 @@ struct LikePredicateState { /// separately. LikePredicateFunction function; - /// Holds the string the StringValue points to and is set any time StringValue is + /// Holds the string the StringRef points to and is set any time StringRef is /// used. std::string search_string; @@ -52,7 +52,7 @@ struct LikePredicateState { /// constant string or has a constant string at the beginning or end of the pattern. /// This will be set in order to check for that pattern in the corresponding part of /// the string. - StringValue search_string_sv; + StringRef search_string_sv; /// Used for LIKE predicates if the pattern is a constant argument and has a constant /// string in the middle of it. This will be use in order to check for the substring @@ -66,7 +66,7 @@ struct LikePredicateState { void set_search_string(const std::string& search_string_arg) { search_string = search_string_arg; - search_string_sv = StringValue(search_string); + search_string_sv = StringRef(search_string); substring_pattern.set_pattern(&search_string_sv); } }; diff --git a/be/src/exprs/literal.cpp b/be/src/exprs/literal.cpp index 28211abc2a..15ff1be5fe 100644 --- a/be/src/exprs/literal.cpp +++ b/be/src/exprs/literal.cpp @@ -162,7 +162,7 @@ Decimal32Val Literal::get_decimal32_val(ExprContext* context, TupleRow* row) { DCHECK(_type.type == TYPE_DECIMAL32) << _type; StringParser::ParseResult result; auto decimal32_value = StringParser::string_to_decimal( - _value.string_val.ptr, _value.string_val.len, _type.precision, _type.scale, &result); + _value.string_val.data, _value.string_val.size, _type.precision, _type.scale, &result); if (result == StringParser::ParseResult::PARSE_SUCCESS) { return Decimal32Val(decimal32_value); } else { @@ -174,7 +174,7 @@ Decimal64Val Literal::get_decimal64_val(ExprContext* context, TupleRow* row) { DCHECK(_type.type == TYPE_DECIMAL64) << _type; StringParser::ParseResult result; auto decimal_value = StringParser::string_to_decimal( - _value.string_val.ptr, _value.string_val.len, _type.precision, _type.scale, &result); + _value.string_val.data, _value.string_val.size, _type.precision, _type.scale, &result); if (result == StringParser::ParseResult::PARSE_SUCCESS) { return Decimal64Val(decimal_value); } else { @@ -186,7 +186,7 @@ Decimal128Val Literal::get_decimal128_val(ExprContext* context, TupleRow* row) { DCHECK(_type.type == TYPE_DECIMAL128I) << _type; StringParser::ParseResult result; auto decimal_value = StringParser::string_to_decimal( - _value.string_val.ptr, _value.string_val.len, _type.precision, _type.scale, &result); + _value.string_val.data, _value.string_val.size, _type.precision, _type.scale, &result); if (result == StringParser::ParseResult::PARSE_SUCCESS) { return Decimal128Val(decimal_value); } else { diff --git a/be/src/exprs/match_predicate.cpp b/be/src/exprs/match_predicate.cpp index 972a1d6728..2eaaeb929c 100644 --- a/be/src/exprs/match_predicate.cpp +++ b/be/src/exprs/match_predicate.cpp @@ -25,7 +25,7 @@ #include "exec/olap_utils.h" #include "exprs/string_functions.h" #include "olap/schema.h" -#include "runtime/string_value.hpp" +#include "vec/common/string_ref.h" namespace doris { @@ -49,10 +49,10 @@ Status MatchPredicate::evaluate(const Schema& schema, InvertedIndexIterator* ite if (is_string_type(column_desc->type()) || (column_desc->type() == OLAP_FIELD_TYPE_ARRAY && is_string_type(column_desc->get_sub_field(0)->type_info()->type()))) { - StringValue match_value; + StringRef match_value; int32_t length = _value.length(); char* buffer = const_cast(_value.c_str()); - match_value.replace(buffer, length); + match_value.replace(buffer, length); //is it safe? s = iterator->read_from_inverted_index(column_desc->name(), &match_value, inverted_index_query_type, num_rows, &roaring); } else if (column_desc->type() == OLAP_FIELD_TYPE_ARRAY && diff --git a/be/src/exprs/math_functions.cpp b/be/src/exprs/math_functions.cpp index bc5507eea3..6eacf5fe6e 100644 --- a/be/src/exprs/math_functions.cpp +++ b/be/src/exprs/math_functions.cpp @@ -680,7 +680,7 @@ LEAST_FNS(); } #define LEAST_NONNUMERIC_FNS() \ - LEAST_NONNUMERIC_FN(string_val, StringVal, StringValue); \ + LEAST_NONNUMERIC_FN(string_val, StringVal, StringRef); \ LEAST_NONNUMERIC_FN(datetime_val, DateTimeVal, DateTimeValue); \ LEAST_NONNUMERIC_FN(decimal_val, DecimalV2Val, DecimalV2Value); @@ -723,7 +723,7 @@ GREATEST_FNS(); } #define GREATEST_NONNUMERIC_FNS() \ - GREATEST_NONNUMERIC_FN(string_val, StringVal, StringValue); \ + GREATEST_NONNUMERIC_FN(string_val, StringVal, StringRef); \ GREATEST_NONNUMERIC_FN(datetime_val, DateTimeVal, DateTimeValue); \ GREATEST_NONNUMERIC_FN(decimal_val, DecimalV2Val, DecimalV2Value); diff --git a/be/src/exprs/minmax_predicate.h b/be/src/exprs/minmax_predicate.h index a237f7f8b2..4394c36d69 100644 --- a/be/src/exprs/minmax_predicate.h +++ b/be/src/exprs/minmax_predicate.h @@ -94,20 +94,20 @@ public: } Status merge(MinMaxFuncBase* minmax_func, ObjectPool* pool) override { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { MinMaxNumFunc* other_minmax = static_cast*>(minmax_func); if (other_minmax->_min < _min) { auto& other_min = other_minmax->_min; - auto str = pool->add(new std::string(other_min.ptr, other_min.len)); - _min.ptr = str->data(); - _min.len = str->length(); + auto str = pool->add(new std::string(other_min.data, other_min.size)); + _min.data = str->data(); + _min.size = str->length(); } if (other_minmax->_max > _max) { auto& other_max = other_minmax->_max; - auto str = pool->add(new std::string(other_max.ptr, other_max.len)); - _max.ptr = str->data(); - _max.len = str->length(); + auto str = pool->add(new std::string(other_max.data, other_max.size)); + _max.data = str->data(); + _max.size = str->length(); } } else { MinMaxNumFunc* other_minmax = static_cast*>(minmax_func); diff --git a/be/src/exprs/new_agg_fn_evaluator.cc b/be/src/exprs/new_agg_fn_evaluator.cc index 3ce6375dde..f795522009 100644 --- a/be/src/exprs/new_agg_fn_evaluator.cc +++ b/be/src/exprs/new_agg_fn_evaluator.cc @@ -30,8 +30,8 @@ #include "exprs/expr_context.h" #include "runtime/raw_value.h" #include "runtime/runtime_state.h" -#include "runtime/string_value.h" #include "udf/udf_internal.h" +#include "vec/common/string_ref.h" using namespace doris; using namespace doris_udf; @@ -255,9 +255,9 @@ void NewAggFnEvaluator::SetDstSlot(const AnyVal* src, const SlotDescriptor& dst_ case TYPE_OBJECT: case TYPE_QUANTILE_STATE: case TYPE_STRING: - *reinterpret_cast(slot) = - StringValue::from_string_val(*reinterpret_cast(src)); + *reinterpret_cast(slot) = *reinterpret_cast(src); return; + case TYPE_DATE: case TYPE_DATETIME: *reinterpret_cast(slot) = @@ -267,6 +267,7 @@ void NewAggFnEvaluator::SetDstSlot(const AnyVal* src, const SlotDescriptor& dst_ case TYPE_DECIMALV2: *reinterpret_cast(slot) = reinterpret_cast(src)->val; return; + default: DCHECK(false) << "NYI: " << dst_slot_desc.type(); } @@ -353,8 +354,7 @@ inline void NewAggFnEvaluator::set_any_val(const void* slot, const TypeDescripto case TYPE_OBJECT: case TYPE_QUANTILE_STATE: case TYPE_STRING: - reinterpret_cast(slot)->to_string_val( - reinterpret_cast(dst)); + reinterpret_cast(slot)->to_string_val(reinterpret_cast(dst)); return; case TYPE_DATE: diff --git a/be/src/exprs/new_in_predicate.cpp b/be/src/exprs/new_in_predicate.cpp index 04d739ee5f..db88cfcd24 100644 --- a/be/src/exprs/new_in_predicate.cpp +++ b/be/src/exprs/new_in_predicate.cpp @@ -33,9 +33,9 @@ SetType get_val(const FunctionContext::TypeDesc* type, const T& x) { } template <> -StringValue get_val(const FunctionContext::TypeDesc* type, const StringVal& x) { +StringRef get_val(const FunctionContext::TypeDesc* type, const StringVal& x) { DCHECK(!x.is_null); - return StringValue::from_string_val(x); + return StringRef(x); } template <> @@ -173,7 +173,7 @@ IN_FUNCTIONS(IntVal, int32_t, int_val) IN_FUNCTIONS(BigIntVal, int64_t, big_int_val) IN_FUNCTIONS(FloatVal, float, float_val) IN_FUNCTIONS(DoubleVal, double, double_val) -IN_FUNCTIONS(StringVal, StringValue, string_val) +IN_FUNCTIONS(StringVal, StringRef, string_val) IN_FUNCTIONS(DateTimeVal, DateTimeValue, datetime_val) IN_FUNCTIONS(DecimalV2Val, DecimalV2Value, decimalv2_val) IN_FUNCTIONS(LargeIntVal, __int128, large_int_val) diff --git a/be/src/exprs/operators.cpp b/be/src/exprs/operators.cpp index 625a1db346..16e903deb1 100644 --- a/be/src/exprs/operators.cpp +++ b/be/src/exprs/operators.cpp @@ -21,7 +21,7 @@ #include "exprs/operators.h" #include "runtime/datetime_value.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { @@ -106,7 +106,7 @@ void Operators::init() {} BINARY_PREDICATE_NUMERIC_FN(NAME, large_int_val, LargeIntVal, OP); \ BINARY_PREDICATE_NUMERIC_FN(NAME, float_val, FloatVal, OP); \ BINARY_PREDICATE_NUMERIC_FN(NAME, double_val, DoubleVal, OP); \ - BINARY_PREDICATE_NONNUMERIC_FN(NAME, string_val, string_val, StringVal, StringValue, OP); \ + BINARY_PREDICATE_NONNUMERIC_FN(NAME, string_val, string_val, StringVal, StringRef, OP); \ BINARY_PREDICATE_NONNUMERIC_FN(NAME, datetime_val, datetime_val, DateTimeVal, DateTimeValue, \ OP); diff --git a/be/src/exprs/rpc_fn.cpp b/be/src/exprs/rpc_fn.cpp index bb130415ae..01363922e8 100644 --- a/be/src/exprs/rpc_fn.cpp +++ b/be/src/exprs/rpc_fn.cpp @@ -151,20 +151,20 @@ Status RPCFn::call_internal(ExprContext* context, TupleRow* row, PFunctionCallRe case TYPE_STRING: case TYPE_CHAR: { ptype->set_id(PGenericType::STRING); - StringValue value = *reinterpret_cast(src_slot); - arg->add_string_value(value.ptr, value.len); + StringRef value = *reinterpret_cast(src_slot); + arg->add_string_value(value.data, value.size); break; } case TYPE_HLL: { ptype->set_id(PGenericType::HLL); - StringValue value = *reinterpret_cast(src_slot); - arg->add_string_value(value.ptr, value.len); + StringRef value = *reinterpret_cast(src_slot); + arg->add_string_value(value.data, value.size); break; } case TYPE_OBJECT: { ptype->set_id(PGenericType::BITMAP); - StringValue value = *reinterpret_cast(src_slot); - arg->add_string_value(value.ptr, value.len); + StringRef value = *reinterpret_cast(src_slot); + arg->add_string_value(value.data, value.size); break; } case TYPE_DECIMALV2: { diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index b2dc2351a6..4ba1bd271e 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -558,8 +558,8 @@ public: case TYPE_VARCHAR: case TYPE_HLL: case TYPE_STRING: { - // StringRef->StringValue - StringValue data = StringValue(const_cast(value.data), value.size); + // StringRef->StringRef + StringRef data = StringRef(value.data, value.size); insert(reinterpret_cast(&data)); break; } @@ -861,7 +861,7 @@ public: ObjectPool* pool) { auto& string_val_ref = column.stringval(); auto val_ptr = pool->add(new std::string(string_val_ref)); - StringValue string_val(const_cast(val_ptr->c_str()), val_ptr->length()); + StringRef string_val(val_ptr->c_str(), val_ptr->length()); set->insert(&string_val); }); break; @@ -994,8 +994,8 @@ public: auto& max_val_ref = minmax_filter->max_val().stringval(); auto min_val_ptr = _pool->add(new std::string(min_val_ref)); auto max_val_ptr = _pool->add(new std::string(max_val_ref)); - StringValue min_val(const_cast(min_val_ptr->c_str()), min_val_ptr->length()); - StringValue max_val(const_cast(max_val_ptr->c_str()), max_val_ptr->length()); + StringRef min_val(min_val_ptr->c_str(), min_val_ptr->length()); + StringRef max_val(max_val_ptr->c_str(), max_val_ptr->length()); return _context.minmax_func->assign(&min_val, &max_val); } default: @@ -1028,12 +1028,12 @@ public: case TYPE_VARCHAR: case TYPE_CHAR: case TYPE_STRING: { - StringValue* min_value = static_cast(_context.minmax_func->get_min()); - StringValue* max_value = static_cast(_context.minmax_func->get_max()); - auto min_val_ptr = _pool->add(new std::string(min_value->ptr)); - auto max_val_ptr = _pool->add(new std::string(max_value->ptr)); - StringValue min_val(const_cast(min_val_ptr->c_str()), min_val_ptr->length()); - StringValue max_val(const_cast(max_val_ptr->c_str()), max_val_ptr->length()); + StringRef* min_value = static_cast(_context.minmax_func->get_min()); + StringRef* max_value = static_cast(_context.minmax_func->get_max()); + auto min_val_ptr = _pool->add(new std::string(min_value->data)); + auto max_val_ptr = _pool->add(new std::string(max_value->data)); + StringRef min_val(min_val_ptr->c_str(), min_val_ptr->length()); + StringRef max_val(max_val_ptr->c_str(), max_val_ptr->length()); _context.minmax_func->assign(&min_val, &max_val); } default: @@ -1616,8 +1616,8 @@ void IRuntimeFilter::to_protobuf(PInFilter* filter) { case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_STRING: { - batch_copy(filter, it, [](PColumnValue* column, const StringValue* value) { - column->set_stringval(std::string(value->ptr, value->len)); + batch_copy(filter, it, [](PColumnValue* column, const StringRef* value) { + column->set_stringval(std::string(value->data, value->size)); }); return; } @@ -1725,12 +1725,12 @@ void IRuntimeFilter::to_protobuf(PMinMaxFilter* filter) { case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_STRING: { - const StringValue* min_string_value = reinterpret_cast(min_data); + const StringRef* min_string_value = reinterpret_cast(min_data); filter->mutable_min_val()->set_stringval( - std::string(min_string_value->ptr, min_string_value->len)); - const StringValue* max_string_value = reinterpret_cast(max_data); + std::string(min_string_value->data, min_string_value->size)); + const StringRef* max_string_value = reinterpret_cast(max_data); filter->mutable_max_val()->set_stringval( - std::string(max_string_value->ptr, max_string_value->len)); + std::string(max_string_value->data, max_string_value->size)); break; } default: { diff --git a/be/src/exprs/slot_ref.cpp b/be/src/exprs/slot_ref.cpp index ce4c3319e7..915704b66a 100644 --- a/be/src/exprs/slot_ref.cpp +++ b/be/src/exprs/slot_ref.cpp @@ -208,7 +208,7 @@ StringVal SlotRef::get_string_val(ExprContext* context, TupleRow* row) { return StringVal::null(); } StringVal result; - StringValue* sv = reinterpret_cast(t->get_slot(_slot_offset)); + StringRef* sv = reinterpret_cast(t->get_slot(_slot_offset)); sv->to_string_val(&result); return result; } diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp index 0db6741f05..0f9e3f5b19 100644 --- a/be/src/exprs/string_functions.cpp +++ b/be/src/exprs/string_functions.cpp @@ -421,8 +421,8 @@ IntVal StringFunctions::instr(FunctionContext* context, const StringVal& str, if (substr.len == 0) { return IntVal(1); } - StringValue str_sv = StringValue::from_string_val(str); - StringValue substr_sv = StringValue::from_string_val(substr); + StringRef str_sv = StringRef(str); + StringRef substr_sv = StringRef(substr); StringSearch search(&substr_sv); // Hive returns positions starting from 1. int loc = search.search(&str_sv); @@ -467,17 +467,17 @@ IntVal StringFunctions::locate_pos(FunctionContext* context, const StringVal& su if (start_pos.val <= 0 || start_pos.val > str.len || start_pos.val > char_len) { return IntVal(0); } - StringValue substr_sv = StringValue::from_string_val(substr); + StringRef substr_sv = StringRef(substr); StringSearch search(&substr_sv); // Input start_pos.val starts from 1. - StringValue adjusted_str(reinterpret_cast(str.ptr) + index[start_pos.val - 1], - str.len - index[start_pos.val - 1]); + StringRef adjusted_str(reinterpret_cast(str.ptr) + index[start_pos.val - 1], + str.len - index[start_pos.val - 1]); int32_t match_pos = search.search(&adjusted_str); if (match_pos >= 0) { // Hive returns the position in the original string starting from 1. size_t char_len = 0; for (size_t i = 0, char_size = 0; i < match_pos; i += char_size) { - char_size = UTF8_BYTE_LENGTH[(unsigned char)(adjusted_str.ptr)[i]]; + char_size = UTF8_BYTE_LENGTH[(unsigned char)(adjusted_str.data)[i]]; ++char_len; } match_pos = char_len; @@ -735,14 +735,14 @@ IntVal StringFunctions::find_in_set(FunctionContext* context, const StringVal& s int32_t token_index = 1; int32_t start = 0; int32_t end; - StringValue str_sv = StringValue::from_string_val(str); + StringRef str_sv = StringRef(str); do { end = start; // Position end. while (end < str_set.len && str_set.ptr[end] != ',') { ++end; } - StringValue token(reinterpret_cast(str_set.ptr) + start, end - start); + StringRef token(reinterpret_cast(str_set.ptr) + start, end - start); if (str_sv.eq(token)) { return IntVal(token_index); } @@ -767,7 +767,7 @@ void StringFunctions::parse_url_prepare(FunctionContext* ctx, return; } UrlParser::UrlPart* url_part = new UrlParser::UrlPart; - *url_part = UrlParser::get_url_part(StringValue::from_string_val(*part)); + *url_part = UrlParser::get_url_part(StringRef(*part)); if (*url_part == UrlParser::INVALID) { std::stringstream ss; ss << "Invalid URL part: " << AnyValUtil::to_string(*part) << std::endl @@ -793,11 +793,11 @@ StringVal StringFunctions::parse_url(FunctionContext* ctx, const StringVal& url, url_part = *reinterpret_cast(state); } else { DCHECK(!ctx->is_arg_constant(1)); - url_part = UrlParser::get_url_part(StringValue::from_string_val(newPart)); + url_part = UrlParser::get_url_part(StringRef(newPart)); } - StringValue result; - if (!UrlParser::parse_url(StringValue::from_string_val(url), url_part, &result)) { + StringRef result; + if (!UrlParser::parse_url(StringRef(url), url_part, &result)) { // url is malformed, or url_part is invalid. if (url_part == UrlParser::INVALID) { std::stringstream ss; @@ -836,12 +836,11 @@ StringVal StringFunctions::parse_url_key(FunctionContext* ctx, const StringVal& url_part = *reinterpret_cast(state); } else { DCHECK(!ctx->is_arg_constant(1)); - url_part = UrlParser::get_url_part(StringValue::from_string_val(part)); + url_part = UrlParser::get_url_part(StringRef(part)); } - StringValue result; - if (!UrlParser::parse_url_key(StringValue::from_string_val(url), url_part, - StringValue::from_string_val(key), &result)) { + StringRef result; + if (!UrlParser::parse_url_key(StringRef(url), url_part, StringRef(key), &result)) { // url is malformed, or url_part is invalid. if (url_part == UrlParser::INVALID) { std::stringstream ss; diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h index ec3e7c25b0..0ee17334cd 100644 --- a/be/src/exprs/string_functions.h +++ b/be/src/exprs/string_functions.h @@ -28,7 +28,7 @@ #include #include "gutil/strings/numbers.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h index 5a397b658f..f477ddb22a 100644 --- a/be/src/olap/aggregate_func.h +++ b/be/src/olap/aggregate_func.h @@ -24,9 +24,9 @@ #include "runtime/datetime_value.h" #include "runtime/decimalv2_value.h" #include "runtime/mem_pool.h" -#include "runtime/string_value.h" #include "util/bitmap_value.h" #include "util/quantile_state.h" +#include "vec/common/string_ref.h" namespace doris { diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index ec2be5b609..26d97a27ed 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -220,8 +220,8 @@ public: bool evaluate_and(const segment_v2::BloomFilter* bf) const override { if constexpr (PT == PredicateType::EQ) { - if constexpr (std::is_same_v) { - return bf->test_bytes(_value.ptr, _value.len); + if constexpr (std::is_same_v) { + return bf->test_bytes(_value.data, _value.size); } else if constexpr (Type == TYPE_DATE) { return bf->test_bytes(const_cast(reinterpret_cast(&_value)), sizeof(uint24_t)); @@ -254,7 +254,7 @@ public: .get_data(); if (nested_column.is_column_dictionary()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { auto* dict_column_ptr = vectorized::check_and_get_column( nested_column); @@ -266,7 +266,7 @@ public: _base_loop_vec(size, flags, null_map.data(), data_array, dict_code); } else { - LOG(FATAL) << "column_dictionary must use StringValue predicate."; + LOG(FATAL) << "column_dictionary must use StringRef predicate."; } } else { auto* data_array = reinterpret_cast) { + if constexpr (std::is_same_v) { auto* dict_column_ptr = vectorized::check_and_get_column(column); auto dict_code = _is_range() ? dict_column_ptr->find_code_by_bound( @@ -288,7 +288,7 @@ public: _base_loop_vec(size, flags, nullptr, data_array, dict_code); } else { - LOG(FATAL) << "column_dictionary must use StringValue predicate."; + LOG(FATAL) << "column_dictionary must use StringRef predicate."; } } else { auto* data_array = @@ -457,7 +457,7 @@ private: void _base_evaluate_bit(const vectorized::IColumn* column, const uint8_t* null_map, const uint16_t* sel, uint16_t size, bool* flags) const { if (column->is_column_dictionary()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { auto* dict_column_ptr = vectorized::check_and_get_column(column); auto* data_array = dict_column_ptr->get_data().data(); @@ -467,7 +467,7 @@ private: _base_loop_bit(sel, size, flags, null_map, data_array, dict_code); } else { - LOG(FATAL) << "column_dictionary must use StringValue predicate."; + LOG(FATAL) << "column_dictionary must use StringRef predicate."; } } else { auto* data_array = @@ -503,7 +503,7 @@ private: uint16_t _base_evaluate(const vectorized::IColumn* column, const uint8_t* null_map, uint16_t* sel, uint16_t size) const { if (column->is_column_dictionary()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { auto* dict_column_ptr = vectorized::check_and_get_column(column); auto* data_array = dict_column_ptr->get_data().data(); @@ -513,7 +513,7 @@ private: return _base_loop(sel, size, null_map, data_array, dict_code); } else { - LOG(FATAL) << "column_dictionary must use StringValue predicate."; + LOG(FATAL) << "column_dictionary must use StringRef predicate."; return 0; } } else { diff --git a/be/src/olap/hll.cpp b/be/src/olap/hll.cpp index bc2ba3de39..7c234d1703 100644 --- a/be/src/olap/hll.cpp +++ b/be/src/olap/hll.cpp @@ -21,8 +21,8 @@ #include #include "common/logging.h" -#include "runtime/string_value.h" #include "util/coding.h" +#include "vec/common/string_ref.h" using std::map; using std::nothrow; diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index e85b1280f9..5a0990cc21 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -31,21 +31,15 @@ #include "olap/wrapper_field.h" #include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" #include "runtime/type_limit.h" #include "uint24.h" #include "vec/columns/column_dictionary.h" +#include "vec/common/string_ref.h" #include "vec/core/types.h" -// for string value template <> -struct std::hash { - uint64_t operator()(const doris::StringValue& rhs) const { return hash_value(rhs); } -}; - -template <> -struct std::equal_to { - bool operator()(const doris::StringValue& lhs, const doris::StringValue& rhs) const { +struct std::equal_to { + bool operator()(const doris::StringRef& lhs, const doris::StringRef& rhs) const { return lhs == rhs; } }; @@ -122,13 +116,14 @@ public: auto values = ((HybridSetType*)hybrid_set.get())->get_inner_set(); if constexpr (is_string_type(Type)) { - for (auto& value : *values) { - StringValue sv = {value.data(), int(value.size())}; + // values' type is "phmap::flat_hash_set" + for (const std::string& value : *values) { + StringRef sv = value; if constexpr (Type == TYPE_CHAR) { _temp_datas.push_back(""); _temp_datas.back().resize(std::max(char_length, value.size())); memcpy(_temp_datas.back().data(), value.data(), value.size()); - sv = {_temp_datas.back().data(), int(_temp_datas.back().size())}; + sv = _temp_datas.back(); } _values->insert(sv); } @@ -346,8 +341,8 @@ public: bool evaluate_and(const segment_v2::BloomFilter* bf) const override { if constexpr (PT == PredicateType::IN_LIST) { for (auto value : *_values) { - if constexpr (std::is_same_v) { - if (bf->test_bytes(value.ptr, value.len)) { + if constexpr (std::is_same_v) { + if (bf->test_bytes(value.data, value.size)) { return true; } } else if constexpr (Type == TYPE_DATE) { @@ -388,7 +383,7 @@ private: uint16_t new_size = 0; if (column->is_column_dictionary()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { auto* nested_col_ptr = vectorized::check_and_get_column< vectorized::ColumnDictionary>(column); auto& data_array = nested_col_ptr->get_data(); @@ -427,7 +422,7 @@ private: } } } else { - LOG(FATAL) << "column_dictionary must use StringValue predicate."; + LOG(FATAL) << "column_dictionary must use StringRef predicate."; } } else { auto* nested_col_ptr = vectorized::check_and_get_column< @@ -467,7 +462,7 @@ private: const vectorized::PaddedPODArray* null_map, const uint16_t* sel, uint16_t size, bool* flags) const { if (column->is_column_dictionary()) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { auto* nested_col_ptr = vectorized::check_and_get_column< vectorized::ColumnDictionary>(column); auto& data_array = nested_col_ptr->get_data(); @@ -503,7 +498,7 @@ private: } } } else { - LOG(FATAL) << "column_dictionary must use StringValue predicate."; + LOG(FATAL) << "column_dictionary must use StringRef predicate."; } } else { auto* nested_col_ptr = vectorized::check_and_get_column< diff --git a/be/src/olap/like_column_predicate.cpp b/be/src/olap/like_column_predicate.cpp index 4da3576425..cc5eb83a0b 100644 --- a/be/src/olap/like_column_predicate.cpp +++ b/be/src/olap/like_column_predicate.cpp @@ -18,8 +18,8 @@ #include "olap/like_column_predicate.h" #include "olap/field.h" -#include "runtime/string_value.hpp" #include "udf/udf.h" +#include "vec/common/string_ref.h" namespace doris { @@ -74,11 +74,11 @@ uint16_t LikeColumnPredicate::evaluate(const vectorized::IColumn& for (uint16_t i = 0; i != size; i++) { uint16_t idx = sel[i]; sel[new_size] = idx; - StringValue cell_value = nested_col_ptr->get_shrink_value(data_array[idx]); + StringRef cell_value = nested_col_ptr->get_shrink_value(data_array[idx]); unsigned char flag = 0; (_state->scalar_function)( const_cast(&_like_state), - StringRef(cell_value.ptr, cell_value.len), pattern, &flag); + StringRef(cell_value.data, cell_value.size), pattern, &flag); new_size += _opposite ^ flag; } } else { @@ -90,11 +90,11 @@ uint16_t LikeColumnPredicate::evaluate(const vectorized::IColumn& continue; } - StringValue cell_value = nested_col_ptr->get_shrink_value(data_array[idx]); + StringRef cell_value = nested_col_ptr->get_shrink_value(data_array[idx]); unsigned char flag = 0; (_state->scalar_function)( const_cast(&_like_state), - StringRef(cell_value.ptr, cell_value.len), pattern, &flag); + StringRef(cell_value.data, cell_value.size), pattern, &flag); new_size += _opposite ^ flag; } } @@ -120,11 +120,11 @@ uint16_t LikeColumnPredicate::evaluate(const vectorized::IColumn& continue; } - StringValue cell_value = str_col->get_data()[idx]; + StringRef cell_value = str_col->get_data()[idx]; unsigned char flag = 0; (_state->scalar_function)( const_cast(&_like_state), - StringRef(cell_value.ptr, cell_value.len), pattern, &flag); + StringRef(cell_value.data, cell_value.size), pattern, &flag); new_size += _opposite ^ flag; } } @@ -137,11 +137,11 @@ uint16_t LikeColumnPredicate::evaluate(const vectorized::IColumn& for (uint16_t i = 0; i != size; i++) { uint16_t idx = sel[i]; sel[new_size] = idx; - StringValue cell_value = nested_col_ptr->get_shrink_value(data_array[idx]); + StringRef cell_value = nested_col_ptr->get_shrink_value(data_array[idx]); unsigned char flag = 0; (_state->scalar_function)( const_cast(&_like_state), - StringRef(cell_value.ptr, cell_value.len), pattern, &flag); + StringRef(cell_value.data, cell_value.size), pattern, &flag); new_size += _opposite ^ flag; } } else { diff --git a/be/src/olap/like_column_predicate.h b/be/src/olap/like_column_predicate.h index 2be7980103..00d7f0f848 100644 --- a/be/src/olap/like_column_predicate.h +++ b/be/src/olap/like_column_predicate.h @@ -18,9 +18,9 @@ #include "exprs/like_predicate.h" #include "olap/column_predicate.h" -#include "runtime/string_value.h" #include "udf/udf.h" #include "vec/columns/column_dictionary.h" +#include "vec/common/string_ref.h" #include "vec/core/types.h" #include "vec/functions/like.h" @@ -48,7 +48,12 @@ public: bool* flags) const override; std::string get_search_str() const override { - return std::string(reinterpret_cast(pattern.ptr), pattern.len); + if constexpr (std::is_same_v) { + return std::string(reinterpret_cast(pattern.data), pattern.size); + } else if constexpr (std::is_same_v) { + return std::string(reinterpret_cast(pattern.ptr), pattern.len); + } + DCHECK(false); } bool is_opposite() const { return _opposite; } @@ -86,18 +91,18 @@ private: continue; } - StringValue cell_value = nested_col_ptr->get_shrink_value(data_array[i]); + StringRef cell_value = nested_col_ptr->get_shrink_value(data_array[i]); if constexpr (is_and) { unsigned char flag = 0; (_state->scalar_function)( const_cast(&_like_state), - StringRef(cell_value.ptr, cell_value.len), pattern, &flag); + StringRef(cell_value.data, cell_value.size), pattern, &flag); flags[i] &= _opposite ^ flag; } else { unsigned char flag = 0; (_state->scalar_function)( const_cast(&_like_state), - StringRef(cell_value.ptr, cell_value.len), pattern, &flag); + StringRef(cell_value.data, cell_value.size), pattern, &flag); flags[i] = _opposite ^ flag; } } @@ -110,18 +115,18 @@ private: vectorized::ColumnDictionary>(column); auto& data_array = nested_col_ptr->get_data(); for (uint16_t i = 0; i < size; i++) { - StringValue cell_value = nested_col_ptr->get_shrink_value(data_array[i]); + StringRef cell_value = nested_col_ptr->get_shrink_value(data_array[i]); if constexpr (is_and) { unsigned char flag = 0; (_state->scalar_function)( const_cast(&_like_state), - StringRef(cell_value.ptr, cell_value.len), pattern, &flag); + StringRef(cell_value.data, cell_value.size), pattern, &flag); flags[i] &= _opposite ^ flag; } else { unsigned char flag = 0; (_state->scalar_function)( const_cast(&_like_state), - StringRef(cell_value.ptr, cell_value.len), pattern, &flag); + StringRef(cell_value.data, cell_value.size), pattern, &flag); flags[i] = _opposite ^ flag; } } @@ -138,9 +143,9 @@ private: } std::string _origin; - // life time controlled by scan node + // lifetime controlled by scan node doris_udf::FunctionContext* _fn_ctx; - using PatternType = std::conditional_t; + using PatternType = std::conditional_t; using StateType = std::conditional_t; PatternType pattern; diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp index 9c8d5ccd67..c0cc938be8 100644 --- a/be/src/olap/null_predicate.cpp +++ b/be/src/olap/null_predicate.cpp @@ -18,8 +18,8 @@ #include "olap/null_predicate.h" #include "olap/field.h" -#include "runtime/string_value.hpp" #include "vec/columns/column_nullable.h" +#include "vec/common/string_ref.h" using namespace doris::vectorized; diff --git a/be/src/olap/predicate_creator.h b/be/src/olap/predicate_creator.h index 5faad0268b..55e9a0a642 100644 --- a/be/src/olap/predicate_creator.h +++ b/be/src/olap/predicate_creator.h @@ -116,8 +116,8 @@ public: } private: - static StringValue convert(const TabletColumn& column, const std::string& condition, - MemPool* pool) { + static StringRef convert(const TabletColumn& column, const std::string& condition, + MemPool* pool) { size_t length = condition.length(); if constexpr (Type == TYPE_CHAR) { length = std::max(static_cast(column.length()), length); @@ -127,7 +127,7 @@ private: memset(buffer, 0, length); memory_copy(buffer, condition.data(), condition.length()); - return StringValue(buffer, length); + return {buffer, length}; } }; diff --git a/be/src/olap/rowset/segment_v2/bloom_filter.h b/be/src/olap/rowset/segment_v2/bloom_filter.h index 139ff3aea2..c235ec39b9 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter.h +++ b/be/src/olap/rowset/segment_v2/bloom_filter.h @@ -126,7 +126,7 @@ public: add_hash(code); } - bool test_bytes(char* buf, uint32_t size) const { + bool test_bytes(const char* buf, uint32_t size) const { if (buf == nullptr) { return *_has_null; } diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index c8d82d798f..316483033f 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -193,8 +193,8 @@ Status ColumnReader::next_batch_of_zone_map(size_t* n, vectorized::MutableColumn assert_cast(*dst).insert_default(); } else { if (is_string) { - auto sv = (StringValue*)max_value->cell_ptr(); - dst->insert_data(sv->ptr, sv->len); + auto sv = (StringRef*)max_value->cell_ptr(); + dst->insert_data(sv->data, sv->size); } else { dst->insert_many_fix_len_data(static_cast(max_value->cell_ptr()), 1); } @@ -205,8 +205,8 @@ Status ColumnReader::next_batch_of_zone_map(size_t* n, vectorized::MutableColumn assert_cast(*dst).insert_null_elements(size); } else { if (is_string) { - auto sv = (StringValue*)min_value->cell_ptr(); - dst->insert_many_data(sv->ptr, sv->len, size); + auto sv = (StringRef*)min_value->cell_ptr(); + dst->insert_many_data(sv->data, sv->size, size); } else { // TODO: the work may cause performance problem, opt latter for (int i = 0; i < size; ++i) { diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index cff316f6ac..402a88f2c5 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -32,8 +32,8 @@ #include "olap/rowset/segment_v2/inverted_index_desc.h" #include "olap/tablet_schema.h" #include "olap/utils.h" -#include "runtime/string_value.h" #include "util/time.h" +#include "vec/common/string_ref.h" namespace doris { namespace segment_v2 { @@ -110,7 +110,7 @@ Status FullTextIndexReader::query(const std::string& column_name, const void* qu InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type, roaring::Roaring* bit_map) { - std::string search_str = reinterpret_cast(query_value)->to_string(); + std::string search_str = reinterpret_cast(query_value)->to_string(); VLOG_DEBUG << column_name << " begin to load the fulltext index from clucene, query_str=" << search_str; std::unique_ptr query; @@ -210,9 +210,9 @@ Status StringTypeInvertedIndexReader::query(const std::string& column_name, cons InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type, roaring::Roaring* bit_map) { - const StringValue* search_query = reinterpret_cast(query_value); - auto act_len = strnlen(search_query->ptr, search_query->len); - std::string search_str(search_query->ptr, act_len); + const StringRef* search_query = reinterpret_cast(query_value); + auto act_len = strnlen(search_query->data, search_query->size); + std::string search_str(search_query->data, act_len); VLOG_DEBUG << "begin to query the inverted index from clucene" << ", column_name: " << column_name << ", search_str: " << search_str; std::wstring column_name_ws = std::wstring(column_name.begin(), column_name.end()); diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index dfc375bd24..4351fe1ee5 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -41,7 +41,6 @@ set(RUNTIME_FILES runtime_state.cpp runtime_filter_mgr.cpp runtime_predicate.cpp - string_value.cpp jsonb_value.cpp thread_context.cpp thread_resource_mgr.cpp diff --git a/be/src/runtime/collection_value.cpp b/be/src/runtime/collection_value.cpp index 8192e35dc7..fc9fb65a24 100644 --- a/be/src/runtime/collection_value.cpp +++ b/be/src/runtime/collection_value.cpp @@ -23,9 +23,9 @@ #include "common/utils.h" #include "runtime/mem_pool.h" #include "runtime/raw_value.h" -#include "runtime/string_value.h" #include "runtime/types.h" #include "util/mem_util.hpp" +#include "vec/common/string_ref.h" namespace doris { @@ -87,19 +87,19 @@ struct CollectionValueSubTypeTrait { template <> struct CollectionValueSubTypeTrait { - using CppType = StringValue; + using CppType = StringRef; using AnyValType = StringVal; }; template <> struct CollectionValueSubTypeTrait { - using CppType = StringValue; + using CppType = StringRef; using AnyValType = StringVal; }; template <> struct CollectionValueSubTypeTrait { - using CppType = StringValue; + using CppType = StringRef; using AnyValType = StringVal; }; @@ -186,43 +186,43 @@ struct ArrayIteratorFunctions : public GenericArrayIteratorFunctions {}; template struct ArrayIteratorFunctionsForString : public GenericArrayIteratorFunctions { - using CppType = StringValue; + using CppType = StringRef; using AnyValType = StringVal; static void shallow_set(void* item, const AnyVal* value) { const auto* src = static_cast(value); auto* dst = static_cast(item); - dst->ptr = convert_to(src->ptr); - dst->len = src->len; + dst->data = convert_to(src->ptr); + dst->size = src->len; } static void shallow_get(AnyVal* value, const void* item) { const auto* src = static_cast(item); auto* dst = static_cast(value); - dst->ptr = convert_to(src->ptr); - dst->len = src->len; + dst->ptr = convert_to(src->data); + dst->len = src->size; } static void self_deep_copy(void* item, const TypeDescriptor&, const GenMemFootprintFunc& gen_mem_footprint, bool convert_ptrs) { auto* string = static_cast(item); - if (!string->len) { + if (!string->size) { return; } - MemFootprint footprint = gen_mem_footprint(string->len); + MemFootprint footprint = gen_mem_footprint(string->size); int64_t offset = footprint.first; auto* copied_string = reinterpret_cast(footprint.second); - memory_copy(copied_string, string->ptr, string->len); - string->ptr = (convert_ptrs ? convert_to(offset) : copied_string); + memory_copy(copied_string, string->data, string->size); + string->data = (convert_ptrs ? convert_to(offset) : copied_string); } static void deserialize(void* item, const char* tuple_data, const TypeDescriptor& type_desc) { DCHECK((item != nullptr) && (tuple_data != nullptr)) << "item or tuple_data is nullptr"; auto* string_value = static_cast(item); - if (string_value->len) { - int64_t offset = convert_to(string_value->ptr); - string_value->ptr = convert_to(tuple_data + offset); + if (string_value->size) { + int64_t offset = convert_to(string_value->data); + string_value->data = convert_to(tuple_data + offset); } } static size_t get_byte_size(const void* item, const TypeDescriptor&) { - return static_cast(item)->len; + return static_cast(item)->size; } }; diff --git a/be/src/runtime/fold_constant_executor.cpp b/be/src/runtime/fold_constant_executor.cpp index a782024eb1..3fad048846 100644 --- a/be/src/runtime/fold_constant_executor.cpp +++ b/be/src/runtime/fold_constant_executor.cpp @@ -234,7 +234,7 @@ string FoldConstantExecutor::_get_result(void* src, size_t size, PrimitiveType s if constexpr (is_vec) { return std::string((char*)src, size); } - return (reinterpret_cast(src))->to_string(); + return (reinterpret_cast(src))->to_string(); } case TYPE_DATE: case TYPE_DATETIME: { diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 779676a45d..a5af1569bc 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -21,7 +21,7 @@ #include "runtime/collection_value.h" #include "runtime/define_primitive_type.h" #include "runtime/jsonb_value.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { @@ -584,7 +584,7 @@ int get_slot_size(PrimitiveType type) { case TYPE_OBJECT: case TYPE_HLL: case TYPE_QUANTILE_STATE: - return sizeof(StringValue); + return sizeof(StringRef); case TYPE_JSONB: return sizeof(JsonBinaryValue); case TYPE_ARRAY: diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index 5af7639290..96272edc71 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -32,7 +32,7 @@ class ColumnString; class DateTimeValue; class DecimalV2Value; -struct StringValue; +struct StringRef; struct JsonBinaryValue; PrimitiveType convert_type_to_primitive(FunctionContext::Type type); @@ -210,24 +210,24 @@ struct PrimitiveTypeTraits { }; template <> struct PrimitiveTypeTraits { - using CppType = StringValue; + using CppType = StringRef; using ColumnType = vectorized::ColumnString; }; template <> struct PrimitiveTypeTraits { - using CppType = StringValue; + using CppType = StringRef; using ColumnType = vectorized::ColumnString; }; template <> struct PrimitiveTypeTraits { - using CppType = StringValue; + using CppType = StringRef; using ColumnType = vectorized::ColumnString; }; template <> struct PrimitiveTypeTraits { - using CppType = StringValue; + using CppType = StringRef; using ColumnType = vectorized::ColumnString; }; diff --git a/be/src/runtime/raw_value.cpp b/be/src/runtime/raw_value.cpp index 67e70d4be2..7f2896ee19 100644 --- a/be/src/runtime/raw_value.cpp +++ b/be/src/runtime/raw_value.cpp @@ -40,7 +40,7 @@ void RawValue::print_value_as_bytes(const void* value, const TypeDescriptor& typ } const char* chars = reinterpret_cast(value); - const StringValue* string_val = nullptr; + const StringRef* string_val = nullptr; switch (type.type) { case TYPE_NULL: @@ -77,8 +77,8 @@ void RawValue::print_value_as_bytes(const void* value, const TypeDescriptor& typ case TYPE_HLL: case TYPE_CHAR: case TYPE_STRING: - string_val = reinterpret_cast(value); - stream->write(static_cast(string_val->ptr), string_val->len); + string_val = reinterpret_cast(value); + stream->write(const_cast(string_val->data), string_val->size); return; case TYPE_DATE: @@ -140,7 +140,7 @@ void RawValue::print_value(const void* value, const TypeDescriptor& type, int sc } std::string tmp; - const StringValue* string_val = nullptr; + const StringRef* string_val = nullptr; switch (type.type) { case TYPE_BOOLEAN: { @@ -177,8 +177,8 @@ void RawValue::print_value(const void* value, const TypeDescriptor& type, int sc case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_STRING: - string_val = reinterpret_cast(value); - tmp.assign(static_cast(string_val->ptr), string_val->len); + string_val = reinterpret_cast(value); + tmp.assign(const_cast(string_val->data), string_val->size); *stream << tmp; return; @@ -288,7 +288,7 @@ void RawValue::print_value(const void* value, const TypeDescriptor& type, int sc std::stringstream out; out.precision(ASCII_PRECISION); - const StringValue* string_val = nullptr; + const StringRef* string_val = nullptr; std::string tmp; bool val = false; @@ -305,12 +305,12 @@ void RawValue::print_value(const void* value, const TypeDescriptor& type, int sc case TYPE_HLL: case TYPE_QUANTILE_STATE: case TYPE_STRING: { - string_val = reinterpret_cast(value); + string_val = reinterpret_cast(value); std::stringstream ss; - ss << "ptr:" << (void*)string_val->ptr << " len:" << string_val->len; + ss << "ptr:" << (void*)string_val->data << " len:" << string_val->size; tmp = ss.str(); - if (string_val->len <= 1000) { - tmp.assign(static_cast(string_val->ptr), string_val->len); + if (string_val->size <= 1000) { + tmp.assign(const_cast(string_val->data), string_val->size); } str->swap(tmp); return; @@ -416,15 +416,15 @@ void RawValue::write(const void* value, void* dst, const TypeDescriptor& type, M case TYPE_VARCHAR: case TYPE_CHAR: case TYPE_STRING: { - const StringValue* src = reinterpret_cast(value); - StringValue* dest = reinterpret_cast(dst); - dest->len = src->len; + const StringRef* src = reinterpret_cast(value); + StringRef* dest = reinterpret_cast(dst); + dest->size = src->size; if (pool != nullptr) { - dest->ptr = reinterpret_cast(pool->allocate(dest->len)); - memcpy(dest->ptr, src->ptr, dest->len); + dest->data = reinterpret_cast(pool->allocate(dest->size)); + memcpy(const_cast(dest->data), src->data, dest->size); } else { - dest->ptr = src->ptr; + dest->data = src->data; } break; @@ -508,12 +508,12 @@ void RawValue::write(const void* value, const TypeDescriptor& type, void* dst, u case TYPE_CHAR: case TYPE_STRING: { DCHECK(buf != nullptr); - const StringValue* src = reinterpret_cast(value); - StringValue* dest = reinterpret_cast(dst); - dest->len = src->len; - dest->ptr = reinterpret_cast(*buf); - memcpy(dest->ptr, src->ptr, dest->len); - *buf += dest->len; + const StringRef* src = reinterpret_cast(value); + StringRef* dest = reinterpret_cast(dst); + dest->size = src->size; + dest->data = reinterpret_cast(*buf); + memcpy(const_cast(dest->data), src->data, dest->size); + *buf += dest->size; break; } @@ -550,8 +550,8 @@ void RawValue::write(const void* value, Tuple* tuple, const SlotDescriptor* slot } int RawValue::compare(const void* v1, const void* v2, const TypeDescriptor& type) { - const StringValue* string_value1; - const StringValue* string_value2; + const StringRef* string_value1; + const StringRef* string_value2; const DateTimeValue* ts_value1; const DateTimeValue* ts_value2; float f1 = 0; @@ -610,8 +610,8 @@ int RawValue::compare(const void* v1, const void* v2, const TypeDescriptor& type case TYPE_VARCHAR: case TYPE_HLL: case TYPE_STRING: - string_value1 = reinterpret_cast(v1); - string_value2 = reinterpret_cast(v2); + string_value1 = reinterpret_cast(v1); + string_value2 = reinterpret_cast(v2); return string_value1->compare(*string_value2); case TYPE_DATE: diff --git a/be/src/runtime/raw_value.h b/be/src/runtime/raw_value.h index ceb722e847..3e1e05a496 100644 --- a/be/src/runtime/raw_value.h +++ b/be/src/runtime/raw_value.h @@ -24,10 +24,10 @@ #include "common/consts.h" #include "common/logging.h" -#include "runtime/string_value.h" #include "runtime/types.h" #include "util/hash_util.hpp" #include "util/types.h" +#include "vec/common/string_ref.h" namespace doris { @@ -123,8 +123,8 @@ public: }; inline bool RawValue::lt(const void* v1, const void* v2, const TypeDescriptor& type) { - const StringValue* string_value1; - const StringValue* string_value2; + const StringRef* string_value1; + const StringRef* string_value2; switch (type.type) { case TYPE_BOOLEAN: @@ -152,8 +152,8 @@ inline bool RawValue::lt(const void* v1, const void* v2, const TypeDescriptor& t case TYPE_VARCHAR: case TYPE_HLL: case TYPE_STRING: - string_value1 = reinterpret_cast(v1); - string_value2 = reinterpret_cast(v2); + string_value1 = reinterpret_cast(v1); + string_value2 = reinterpret_cast(v2); return string_value1->lt(*string_value2); case TYPE_DATE: @@ -195,8 +195,8 @@ inline bool RawValue::lt(const void* v1, const void* v2, const TypeDescriptor& t }; } inline bool RawValue::eq(const void* v1, const void* v2, const TypeDescriptor& type) { - const StringValue* string_value1; - const StringValue* string_value2; + const StringRef* string_value1; + const StringRef* string_value2; switch (type.type) { case TYPE_BOOLEAN: @@ -224,8 +224,8 @@ inline bool RawValue::eq(const void* v1, const void* v2, const TypeDescriptor& t case TYPE_VARCHAR: case TYPE_HLL: case TYPE_STRING: - string_value1 = reinterpret_cast(v1); - string_value2 = reinterpret_cast(v2); + string_value1 = reinterpret_cast(v1); + string_value2 = reinterpret_cast(v2); return string_value1->eq(*string_value2); case TYPE_DATE: @@ -284,8 +284,8 @@ inline uint32_t RawValue::get_hash_value(const void* v, const PrimitiveType& typ case TYPE_CHAR: case TYPE_HLL: case TYPE_STRING: { - const StringValue* string_value = reinterpret_cast(v); - return HashUtil::hash(string_value->ptr, string_value->len, seed); + const StringRef* string_value = reinterpret_cast(v); + return HashUtil::hash(string_value->data, string_value->size, seed); } case TYPE_BOOLEAN: { @@ -353,8 +353,8 @@ inline uint32_t RawValue::get_hash_value_fvn(const void* v, const PrimitiveType& case TYPE_HLL: case TYPE_OBJECT: case TYPE_STRING: { - const StringValue* string_value = reinterpret_cast(v); - return HashUtil::fnv_hash(string_value->ptr, string_value->len, seed); + const StringRef* string_value = reinterpret_cast(v); + return HashUtil::fnv_hash(string_value->data, string_value->size, seed); } case TYPE_BOOLEAN: { @@ -421,20 +421,20 @@ inline uint32_t RawValue::zlib_crc32(const void* v, const TypeDescriptor& type, case TYPE_VARCHAR: case TYPE_HLL: case TYPE_STRING: { - const StringValue* string_value = reinterpret_cast(v); - return HashUtil::zlib_crc_hash(string_value->ptr, string_value->len, seed); + const StringRef* string_value = reinterpret_cast(v); + return HashUtil::zlib_crc_hash(string_value->data, string_value->size, seed); } case TYPE_CHAR: { // TODO(zc): ugly, use actual value to compute hash value - const StringValue* string_value = reinterpret_cast(v); + const StringRef* string_value = reinterpret_cast(v); int len = 0; - while (len < string_value->len) { - if (string_value->ptr[len] == '\0') { + while (len < string_value->size) { + if (string_value->data[len] == '\0') { break; } len++; } - return HashUtil::zlib_crc_hash(string_value->ptr, len, seed); + return HashUtil::zlib_crc_hash(string_value->data, len, seed); } case TYPE_BOOLEAN: case TYPE_TINYINT: diff --git a/be/src/runtime/string_buffer.hpp b/be/src/runtime/string_buffer.hpp index e3893138c2..e54473d4e9 100644 --- a/be/src/runtime/string_buffer.hpp +++ b/be/src/runtime/string_buffer.hpp @@ -18,24 +18,24 @@ #pragma once #include "runtime/mem_pool.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { // Dynamic-sizable string (similar to std::string) but without as many // copies and allocations. -// StringBuffer wraps a StringValue object with a pool and memory buffer length. +// StringBuffer wraps a StringRef object with a pool and memory buffer length. // It supports a subset of the std::string functionality but will only allocate // bigger string buffers as necessary. std::string tries to be immutable and will // reallocate very often. std::string should be avoided in all hot paths. class StringBuffer { public: // C'tor for StringBuffer. Memory backing the string will be allocated from - // the pool as necessary. Can optionally be initialized from a StringValue. - StringBuffer(MemPool* pool, StringValue* str) : _pool(pool), _buffer_size(0) { + // the pool as necessary. Can optionally be initialized from a StringRef. + StringBuffer(MemPool* pool, StringRef* str) : _pool(pool), _buffer_size(0) { if (str != NULL) { _string_value = *str; - _buffer_size = str->len; + _buffer_size = str->size; } } @@ -45,14 +45,14 @@ public: // append 'str' to the current string, allocating a new buffer as necessary. void append(const char* str, int len) { - int new_len = len + _string_value.len; + int new_len = len + _string_value.size; if (new_len > _buffer_size) { grow_buffer(new_len); } - memcpy(_string_value.ptr + _string_value.len, str, len); - _string_value.len = new_len; + memcpy(const_cast(_string_value.data) + _string_value.size, str, len); + _string_value.size = new_len; } // TODO: switch everything to uint8_t? @@ -64,23 +64,23 @@ public: append(str, len); } - // clear the underlying StringValue. The allocated buffer can be reused. - void clear() { _string_value.len = 0; } + // clear the underlying StringRef. The allocated buffer can be reused. + void clear() { _string_value.size = 0; } - // Clears the underlying buffer and StringValue + // Clears the underlying buffer and StringRef void reset() { - _string_value.len = 0; + _string_value.size = 0; _buffer_size = 0; } // Returns whether the current string is empty - bool empty() const { return _string_value.len == 0; } + bool empty() const { return _string_value.size == 0; } // Returns the length of the current string - int size() const { return _string_value.len; } + int size() const { return _string_value.size; } - // Returns the underlying StringValue - const StringValue& str() const { return _string_value; } + // Returns the underlying StringRef + const StringRef& str() const { return _string_value; } // Returns the buffer size int buffer_size() const { return _buffer_size; } @@ -92,16 +92,16 @@ private: void grow_buffer(int new_len) { char* new_buffer = reinterpret_cast(_pool->allocate(new_len)); - if (_string_value.len > 0) { - memcpy(new_buffer, _string_value.ptr, _string_value.len); + if (_string_value.size > 0) { + memcpy(new_buffer, _string_value.data, _string_value.size); } - _string_value.ptr = new_buffer; + _string_value.data = new_buffer; _buffer_size = new_len; } MemPool* _pool; - StringValue _string_value; + StringRef _string_value; int _buffer_size; }; diff --git a/be/src/runtime/string_search.hpp b/be/src/runtime/string_search.hpp index 6565f516b4..a7d00673aa 100644 --- a/be/src/runtime/string_search.hpp +++ b/be/src/runtime/string_search.hpp @@ -22,42 +22,37 @@ #include #include "common/logging.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" #include "vec/common/volnitsky.h" namespace doris { class StringSearch { public: - virtual ~StringSearch() {} + virtual ~StringSearch() = default; StringSearch() : _pattern(nullptr) {} - StringSearch(const StringValue* pattern) { set_pattern(pattern); } - - void set_pattern(const StringValue* pattern) { - _pattern = pattern; - _vol_searcher.reset(new Volnitsky(pattern->ptr, pattern->len)); - } + StringSearch(const StringRef* pattern) { set_pattern(pattern); } void set_pattern(const StringRef* pattern) { - _pattern = reinterpret_cast(pattern); + _pattern = pattern; _vol_searcher.reset(new Volnitsky(pattern->data, pattern->size)); } // search for this pattern in str. // Returns the offset into str if the pattern exists // Returns -1 if the pattern is not found - int search(const StringValue* str) const { - auto it = search(str->ptr, str->len); - if (it == str->ptr + str->len) { + int search(const StringRef* str) const { + auto it = search(str->data, str->size); + if (it == str->data + str->size) { return -1; } else { - return it - str->ptr; + return it - str->data; } } int search(const StringRef& str) const { - auto it = search(const_cast(str.data), str.size); + auto it = search(str.data, str.size); if (it == str.data + str.size) { return -1; } else { @@ -68,18 +63,18 @@ public: // search for this pattern in str. // Returns the offset into str if the pattern exists // Returns str+len if the pattern is not found - const char* search(char* str, size_t len) const { - if (!str || !_pattern || _pattern->len == 0) { + const char* search(const char* str, size_t len) const { + if (!str || !_pattern || _pattern->size == 0) { return str + len; } return _vol_searcher->search(str, len); } - inline size_t get_pattern_length() { return _pattern ? _pattern->len : 0; } + inline size_t get_pattern_length() { return _pattern ? _pattern->size : 0; } private: - const StringValue* _pattern; + const StringRef* _pattern; std::unique_ptr _vol_searcher; }; diff --git a/be/src/runtime/string_value.cpp b/be/src/runtime/string_value.cpp deleted file mode 100644 index aa16783545..0000000000 --- a/be/src/runtime/string_value.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/string-value.cc -// and modified by Doris - -#include "runtime/string_value.h" - -#include - -#include "exec/olap_utils.h" - -namespace doris { - -std::string StringValue::debug_string() const { - return std::string(ptr, len); -} - -std::string StringValue::to_string() const { - return std::string(ptr, len); -} - -std::ostream& operator<<(std::ostream& os, const StringValue& string_value) { - return os << string_value.debug_string(); -} - -std::size_t operator-(const StringValue& v1, const StringValue& v2) { - return 0; -} - -constexpr char StringValue::MIN_CHAR = 0x00; - -constexpr char StringValue::MAX_CHAR = 0xff; - -StringValue StringValue::min_string_val() { - return StringValue((char*)(&StringValue::MIN_CHAR), 0); -} - -StringValue StringValue::max_string_val() { - return StringValue((char*)(&StringValue::MAX_CHAR), 1); -} - -} // namespace doris diff --git a/be/src/runtime/string_value.h b/be/src/runtime/string_value.h deleted file mode 100644 index 4878ca31e4..0000000000 --- a/be/src/runtime/string_value.h +++ /dev/null @@ -1,211 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/string-value.h -// and modified by Doris - -#pragma once - -#include - -#include "udf/udf.h" -#include "util/cpu_info.h" -#include "util/hash_util.hpp" -#include "vec/common/string_ref.h" -#ifdef __SSE4_2__ -#include "util/sse_util.hpp" -#endif - -namespace doris { - -// Compare two strings using sse4.2 intrinsics if they are available. This code assumes -// that the trivial cases are already handled (i.e. one string is empty). -// Returns: -// < 0 if s1 < s2 -// 0 if s1 == s2 -// > 0 if s1 > s2 -// The SSE code path is just under 2x faster than the non-sse code path. -// - s1/n1: ptr/len for the first string -// - s2/n2: ptr/len for the second string -// - len: min(n1, n2) - this can be more cheaply passed in by the caller -static inline int string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2, - int64_t len) { - DCHECK_EQ(len, std::min(n1, n2)); -#ifdef __SSE4_2__ - while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) { - __m128i xmm0 = _mm_loadu_si128(reinterpret_cast(s1)); - __m128i xmm1 = _mm_loadu_si128(reinterpret_cast(s2)); - int chars_match = _mm_cmpestri(xmm0, sse_util::CHARS_PER_128_BIT_REGISTER, xmm1, - sse_util::CHARS_PER_128_BIT_REGISTER, sse_util::STRCMP_MODE); - if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) { - return (unsigned char)s1[chars_match] - (unsigned char)s2[chars_match]; - } - len -= sse_util::CHARS_PER_128_BIT_REGISTER; - s1 += sse_util::CHARS_PER_128_BIT_REGISTER; - s2 += sse_util::CHARS_PER_128_BIT_REGISTER; - } -#endif - unsigned char u1, u2; - while (len-- > 0) { - u1 = (unsigned char)*s1++; - u2 = (unsigned char)*s2++; - if (u1 != u2) return u1 - u2; - if (u1 == '\0') return n1 - n2; - } - - return n1 - n2; -} - -// The format of a string-typed slot. -// The returned StringValue of all functions that return StringValue -// shares its buffer the parent. -struct StringValue { - const static char MIN_CHAR; - const static char MAX_CHAR; - - static const int MAX_LENGTH = (1 << 30); - // TODO: change ptr to an offset relative to a contiguous memory block, - // so that we can send row batches between nodes without having to swizzle - // pointers - // NOTE: This struct should keep the same memory layout with Slice, otherwise - // it will lead to BE crash. - // TODO(zc): we should unify this struct with Slice some day. - char* ptr; - size_t len; - - StringValue(char* ptr, int len) : ptr(ptr), len(len) {} - StringValue(const char* ptr, int len) : ptr(const_cast(ptr)), len(len) {} - StringValue() : ptr(nullptr), len(0) {} - StringValue(const StringRef& str) : ptr(const_cast(str.data)), len(str.size) {} - - /// Construct a StringValue from 's'. 's' must be valid for as long as - /// this object is valid. - explicit StringValue(const std::string& s) : ptr(const_cast(s.c_str())), len(s.size()) { - DCHECK_LE(len, MAX_LENGTH); - } - - void replace(char* ptr, int len) { - this->ptr = ptr; - this->len = len; - } - - // Byte-by-byte comparison. Returns: - // this < other: -1 - // this == other: 0 - // this > other: 1 - int compare(const StringValue& other) const { - int l = std::min(len, other.len); - - if (l == 0) { - if (len == other.len) { - return 0; - } else if (len == 0) { - return -1; - } else { - DCHECK_EQ(other.len, 0); - return 1; - } - } - - return string_compare(this->ptr, this->len, other.ptr, other.len, l); - } - - // == - bool eq(const StringValue& other) const { - if (this->len != other.len) { - return false; - } - -#if defined(__SSE2__) - return memequalSSE2Wide(this->ptr, other.ptr, this->len); -#endif - - return string_compare(this->ptr, this->len, other.ptr, other.len, this->len) == 0; - } - - bool operator==(const StringValue& other) const { return eq(other); } - // != - bool ne(const StringValue& other) const { return !eq(other); } - // <= - bool le(const StringValue& other) const { return compare(other) <= 0; } - // >= - bool ge(const StringValue& other) const { return compare(other) >= 0; } - // < - bool lt(const StringValue& other) const { return compare(other) < 0; } - // > - bool gt(const StringValue& other) const { return compare(other) > 0; } - - bool operator!=(const StringValue& other) const { return ne(other); } - - bool operator<=(const StringValue& other) const { return le(other); } - - bool operator>=(const StringValue& other) const { return ge(other); } - - bool operator<(const StringValue& other) const { return lt(other); } - - bool operator>(const StringValue& other) const { return gt(other); } - - std::string debug_string() const; - - std::string to_string() const; - - // Returns the substring starting at start_pos until the end of string. - StringValue substring(int start_pos) const; - - // Returns the substring starting at start_pos with given length. - // If new_len < 0 then the substring from start_pos to end of string is returned. - StringValue substring(int start_pos, int new_len) const; - - // Trims leading and trailing spaces. - StringValue trim() const; - - // Find the first position char of appear, return -1 if not found - int64_t find_first_of(char c) const; - - void to_string_val(doris_udf::StringVal* sv) const { - *sv = doris_udf::StringVal(reinterpret_cast(ptr), len); - } - - static StringValue from_string_val(const doris_udf::StringVal& sv) { - return StringValue(reinterpret_cast(sv.ptr), sv.len); - } - - static StringValue min_string_val(); - - static StringValue max_string_val(); - - struct Comparator { - bool operator()(const StringValue& a, const StringValue& b) const { - return a.compare(b) < 0; - } - }; - - struct HashOfStringValue { - size_t operator()(const StringValue& v) const { return HashUtil::hash(v.ptr, v.len, 0); } - }; -}; - -// This function must be called 'hash_value' to be picked up by boost. -inline std::size_t hash_value(const StringValue& v) { - return HashUtil::hash(v.ptr, v.len, 0); -} - -std::ostream& operator<<(std::ostream& os, const StringValue& string_value); - -std::size_t operator-(const StringValue& v1, const StringValue& v2); - -} // namespace doris diff --git a/be/src/runtime/string_value.hpp b/be/src/runtime/string_value.hpp deleted file mode 100644 index fb8039e661..0000000000 --- a/be/src/runtime/string_value.hpp +++ /dev/null @@ -1,59 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/string-value.hpp -// and modified by Doris - -#pragma once - -#include - -#include "runtime/string_value.h" - -namespace doris { - -inline StringValue StringValue::substring(int start_pos) const { - return StringValue(ptr + start_pos, len - start_pos); -} - -inline StringValue StringValue::substring(int start_pos, int new_len) const { - return StringValue(ptr + start_pos, (new_len < 0) ? (len - start_pos) : new_len); -} - -inline StringValue StringValue::trim() const { - // Remove leading and trailing spaces. - int32_t begin = 0; - - while (begin < len && ptr[begin] == ' ') { - ++begin; - } - - int32_t end = len - 1; - - while (end > begin && ptr[end] == ' ') { - --end; - } - - return StringValue(ptr + begin, end - begin + 1); -} - -inline int64_t StringValue::find_first_of(char c) const { - const char* p = static_cast(memchr(ptr, c, len)); - return p == nullptr ? -1 : p - ptr; -} - -} // namespace doris diff --git a/be/src/runtime/tuple.cpp b/be/src/runtime/tuple.cpp index 641c13c4bb..b71a55b050 100644 --- a/be/src/runtime/tuple.cpp +++ b/be/src/runtime/tuple.cpp @@ -30,9 +30,9 @@ #include "runtime/descriptors.h" #include "runtime/mem_pool.h" #include "runtime/raw_value.h" -#include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "util/mem_util.hpp" +#include "vec/common/string_ref.h" namespace doris { @@ -57,8 +57,8 @@ int64_t Tuple::varlen_byte_size(const TupleDescriptor& desc) const { if (is_null((*slot)->null_indicator_offset())) { continue; } - const StringValue* string_val = get_string_slot((*slot)->tuple_offset()); - result += string_val->len; + const StringRef* string_val = get_string_slot((*slot)->tuple_offset()); + result += string_val->size; } return result; @@ -76,17 +76,17 @@ void Tuple::deep_copy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool, bo // allocate in the same pool and then copy all non-null string slots for (auto string_slot : desc.string_slots()) { DCHECK(string_slot->type().is_string_type()); - StringValue* string_v = dst->get_string_slot(string_slot->tuple_offset()); + StringRef* string_v = dst->get_string_slot(string_slot->tuple_offset()); if (!dst->is_null(string_slot->null_indicator_offset())) { - if (string_v->len != 0) { + if (string_v->size != 0) { int64_t offset = pool->total_allocated_bytes(); - char* string_copy = (char*)(pool->allocate(string_v->len)); - memory_copy(string_copy, string_v->ptr, string_v->len); - string_v->ptr = (convert_ptrs ? convert_to(offset) : string_copy); + char* string_copy = (char*)(pool->allocate(string_v->size)); + memory_copy(string_copy, string_v->data, string_v->size); + string_v->data = (convert_ptrs ? convert_to(offset) : string_copy); } } else { - string_v->ptr = nullptr; - string_v->len = 0; + string_v->data = nullptr; + string_v->size = 0; } } @@ -135,14 +135,14 @@ int64_t Tuple::dcopy_with_new(Tuple* dst, const TupleDescriptor& desc) { DCHECK(slot->type().is_string_type()); if (!dst->is_null(slot->null_indicator_offset())) { - StringValue* string_v = dst->get_string_slot(slot->tuple_offset()); - bytes += string_v->len; - if (string_v->len != 0) { - char* string_copy = new char[string_v->len]; - memory_copy(string_copy, string_v->ptr, string_v->len); - string_v->ptr = string_copy; + StringRef* string_v = dst->get_string_slot(slot->tuple_offset()); + bytes += string_v->size; + if (string_v->size != 0) { + char* string_copy = new char[string_v->size]; + memory_copy(string_copy, string_v->data, string_v->size); + string_v->data = string_copy; } else { - string_v->ptr = nullptr; + string_v->data = nullptr; } } } @@ -153,10 +153,10 @@ int64_t Tuple::release_string(const TupleDescriptor& desc) { int64_t bytes = 0; for (auto slot : desc.string_slots()) { if (!is_null(slot->null_indicator_offset())) { - StringValue* string_v = get_string_slot(slot->tuple_offset()); - delete[] string_v->ptr; - string_v->ptr = nullptr; - bytes += string_v->len; + StringRef* string_v = get_string_slot(slot->tuple_offset()); + delete[] string_v->data; + string_v->data = nullptr; + bytes += string_v->size; } } return bytes; @@ -171,15 +171,15 @@ void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset, for (auto slot_desc : desc.string_slots()) { DCHECK(slot_desc->type().is_string_type()); - StringValue* string_v = dst->get_string_slot(slot_desc->tuple_offset()); + StringRef* string_v = dst->get_string_slot(slot_desc->tuple_offset()); if (!dst->is_null(slot_desc->null_indicator_offset())) { - memory_copy(*data, string_v->ptr, string_v->len); - string_v->ptr = (convert_ptrs ? convert_to(*offset) : *data); - *data += string_v->len; - *offset += string_v->len; + memory_copy(*data, string_v->data, string_v->size); + string_v->data = (convert_ptrs ? convert_to(*offset) : *data); + *data += string_v->size; + *offset += string_v->size; } else { - string_v->ptr = (convert_ptrs ? convert_to(*offset) : *data); - string_v->len = 0; + string_v->data = (convert_ptrs ? convert_to(*offset) : *data); + string_v->size = 0; } } @@ -198,7 +198,7 @@ void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset, template void Tuple::materialize_exprs(TupleRow* row, const TupleDescriptor& desc, const std::vector& materialize_expr_ctxs, MemPool* pool, - std::vector* non_null_var_len_values, + std::vector* non_null_var_len_values, int* total_var_len) { if (collect_string_vals) { non_null_var_len_values->clear(); @@ -235,9 +235,9 @@ void Tuple::materialize_exprs(TupleRow* row, const TupleDescriptor& desc, RawValue::write(src, dst, slot_desc->type(), pool); if (collect_string_vals) { if (slot_desc->type().is_string_type()) { - StringValue* string_val = convert_to(dst); + StringRef* string_val = convert_to(dst); non_null_var_len_values->push_back(string_val); - *total_var_len += string_val->len; + *total_var_len += string_val->size; } } } else { @@ -252,12 +252,12 @@ void Tuple::materialize_exprs(TupleRow* row, const TupleDescriptor& desc, template void Tuple::materialize_exprs( TupleRow* row, const TupleDescriptor& desc, const std::vector& materialize_expr_ctxs, MemPool* pool, - std::vector* non_null_var_values, int* total_var_len); + std::vector* non_null_var_values, int* total_var_len); template void Tuple::materialize_exprs(TupleRow* row, const TupleDescriptor& desc, const std::vector& materialize_expr_ctxs, MemPool* pool, - std::vector* non_null_var_values, + std::vector* non_null_var_values, int* total_var_len); std::string Tuple::to_string(const TupleDescriptor& d) const { diff --git a/be/src/runtime/tuple.h b/be/src/runtime/tuple.h index 821f9b3eac..e19d8cc68c 100644 --- a/be/src/runtime/tuple.h +++ b/be/src/runtime/tuple.h @@ -28,7 +28,7 @@ namespace doris { -struct StringValue; +struct StringRef; class CollectionValue; class TupleDescriptor; class DateTimeValue; @@ -115,7 +115,7 @@ public: template void materialize_exprs(TupleRow* row, const TupleDescriptor& desc, const std::vector& materialize_expr_ctxs, MemPool* pool, - std::vector* non_null_var_len_values, int* total_var_len); + std::vector* non_null_var_len_values, int* total_var_len); // Turn null indicator bit on. // Turn null indicator bit on. For non-nullable slots, the mask will be 0 and @@ -147,14 +147,14 @@ public: return &_data[offset]; } - StringValue* get_string_slot(int offset) { + StringRef* get_string_slot(int offset) { DCHECK(offset != -1); // -1 offset indicates non-materialized slot - return reinterpret_cast(&_data[offset]); + return reinterpret_cast(&_data[offset]); } - const StringValue* get_string_slot(int offset) const { + const StringRef* get_string_slot(int offset) const { DCHECK(offset != -1); // -1 offset indicates non-materialized slot - return reinterpret_cast(&_data[offset]); + return reinterpret_cast(&_data[offset]); } CollectionValue* get_collection_slot(int offset) { diff --git a/be/src/runtime/type_limit.h b/be/src/runtime/type_limit.h index 923f2891e9..378c27e9e1 100644 --- a/be/src/runtime/type_limit.h +++ b/be/src/runtime/type_limit.h @@ -19,7 +19,7 @@ #include "runtime/datetime_value.h" #include "runtime/decimalv2_value.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { @@ -30,9 +30,9 @@ struct type_limit { }; template <> -struct type_limit { - static StringValue min() { return StringValue::min_string_val(); } - static StringValue max() { return StringValue::max_string_val(); } +struct type_limit { + static StringRef min() { return StringRef::min_string_val(); } + static StringRef max() { return StringRef::max_string_val(); } }; template <> diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index 49758f40d1..734f6eab1e 100644 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -32,7 +32,7 @@ namespace doris { class FunctionContextImpl; struct ColumnPtrWrapper; -struct StringValue; +struct StringRef; class BitmapValue; class DecimalV2Value; class DateTimeValue; @@ -303,7 +303,8 @@ private: // the same lifetime as results for the UDF. In other words, the UDF can return // memory from input arguments without making copies. For example, a function like // substring will not need to allocate and copy the smaller string. For cases where -// the UDF needs a buffer, it should use the StringValue(FunctionContext, len) c'tor. +// the UDF needs a buffer, it should use the StringRef(FunctionContext, len) c'tor. +// TODO: things above is not right. StringRef shouldn't use here. // // The UDF can optionally specify a Prepare function. The prepare function is called // once before any calls to the Udf to evaluate values. This is the appropriate time for @@ -743,6 +744,7 @@ struct DateTimeV2Val : public AnyVal { bool operator!=(const DateTimeV2Val& other) const { return !(*this == other); } }; +// TODO: need to set explicit align? // Note: there is a difference between a nullptr string (is_null == true) and an // empty string (len == 0). struct StringVal : public AnyVal { diff --git a/be/src/util/bitmap_intersect.h b/be/src/util/bitmap_intersect.h index 94ff8dc283..bc4a8c6923 100644 --- a/be/src/util/bitmap_intersect.h +++ b/be/src/util/bitmap_intersect.h @@ -17,9 +17,9 @@ #pragma once #include -#include "runtime/string_value.h" #include "udf/udf.h" #include "util/bitmap_value.h" +#include "vec/common/string_ref.h" namespace doris { @@ -62,9 +62,9 @@ public: }; template <> -inline StringValue Helper::get_val(const StringVal& x) { +inline StringRef Helper::get_val(const StringVal& x) { DCHECK(!x.is_null); - return StringValue::from_string_val(x); + return StringRef(x); } template <> @@ -98,11 +98,11 @@ inline char* Helper::write_to(const DecimalV2Value& v, char* des } template <> -inline char* Helper::write_to(const StringValue& v, char* dest) { - *(int32_t*)dest = v.len; +inline char* Helper::write_to(const StringRef& v, char* dest) { + *(int32_t*)dest = v.size; dest += 4; - memcpy(dest, v.ptr, v.len); - dest += v.len; + memcpy(dest, v.data, v.size); + dest += v.size; return dest; } @@ -127,8 +127,8 @@ inline int32_t Helper::serialize_size(const DecimalV2Value& v) { } template <> -inline int32_t Helper::serialize_size(const StringValue& v) { - return v.len + 4; +inline int32_t Helper::serialize_size(const StringRef& v) { + return v.size + 4; } template <> @@ -157,10 +157,10 @@ inline void Helper::read_from(const char** src, DecimalV2Value* } template <> -inline void Helper::read_from(const char** src, StringValue* result) { +inline void Helper::read_from(const char** src, StringRef* result) { int32_t length = *(int32_t*)(*src); *src += 4; - *result = StringValue((char*)*src, length); + *result = StringRef((char*)*src, length); *src += length; } diff --git a/be/src/util/simd/vstring_function.h b/be/src/util/simd/vstring_function.h index 3c1a4e7f32..d716e8e573 100644 --- a/be/src/util/simd/vstring_function.h +++ b/be/src/util/simd/vstring_function.h @@ -27,8 +27,8 @@ #include #endif -#include "runtime/string_value.hpp" #include "util/simd/lower_upper_impl.h" +#include "vec/common/string_ref.h" namespace doris { diff --git a/be/src/util/static_asserts.cpp b/be/src/util/static_asserts.cpp index b578646f9b..b740349593 100644 --- a/be/src/util/static_asserts.cpp +++ b/be/src/util/static_asserts.cpp @@ -19,16 +19,16 @@ // and modified by Doris #include "runtime/datetime_value.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" #include "vec/runtime/vdatetime_value.h" namespace doris { // This class is unused. It contains static (compile time) asserts. // This is useful to validate struct sizes and other similar things -// at compile time. If these asserts fail, the compile will fail. +// at compile time. If these assertions fail, the compiling will fail. class UnusedClass { private: - static_assert(sizeof(StringValue) == 16); - static_assert(offsetof(StringValue, len) == 8); + static_assert(sizeof(StringRef) == 16); + static_assert(offsetof(StringRef, size) == 8); // Datetime value static_assert(sizeof(DateTimeValue) == 16); static_assert(sizeof(doris::vectorized::VecDateTimeValue) == 8); diff --git a/be/src/util/url_parser.cpp b/be/src/util/url_parser.cpp index 06ed454251..e54281eb2c 100644 --- a/be/src/util/url_parser.cpp +++ b/be/src/util/url_parser.cpp @@ -19,25 +19,25 @@ #include -#include "runtime/string_value.hpp" +#include "vec/common/string_ref.h" namespace doris { -const StringValue UrlParser::_s_url_authority(const_cast("AUTHORITY"), 9); -const StringValue UrlParser::_s_url_file(const_cast("FILE"), 4); -const StringValue UrlParser::_s_url_host(const_cast("HOST"), 4); -const StringValue UrlParser::_s_url_path(const_cast("PATH"), 4); -const StringValue UrlParser::_s_url_protocol(const_cast("PROTOCOL"), 8); -const StringValue UrlParser::_s_url_query(const_cast("QUERY"), 5); -const StringValue UrlParser::_s_url_ref(const_cast("REF"), 3); -const StringValue UrlParser::_s_url_userinfo(const_cast("USERINFO"), 8); -const StringValue UrlParser::_s_url_port(const_cast("PORT"), 4); -const StringValue UrlParser::_s_protocol(const_cast("://"), 3); -const StringValue UrlParser::_s_at(const_cast("@"), 1); -const StringValue UrlParser::_s_slash(const_cast("/"), 1); -const StringValue UrlParser::_s_colon(const_cast(":"), 1); -const StringValue UrlParser::_s_question(const_cast("?"), 1); -const StringValue UrlParser::_s_hash(const_cast("#"), 1); +const StringRef UrlParser::_s_url_authority(const_cast("AUTHORITY"), 9); +const StringRef UrlParser::_s_url_file(const_cast("FILE"), 4); +const StringRef UrlParser::_s_url_host(const_cast("HOST"), 4); +const StringRef UrlParser::_s_url_path(const_cast("PATH"), 4); +const StringRef UrlParser::_s_url_protocol(const_cast("PROTOCOL"), 8); +const StringRef UrlParser::_s_url_query(const_cast("QUERY"), 5); +const StringRef UrlParser::_s_url_ref(const_cast("REF"), 3); +const StringRef UrlParser::_s_url_userinfo(const_cast("USERINFO"), 8); +const StringRef UrlParser::_s_url_port(const_cast("PORT"), 4); +const StringRef UrlParser::_s_protocol(const_cast("://"), 3); +const StringRef UrlParser::_s_at(const_cast("@"), 1); +const StringRef UrlParser::_s_slash(const_cast("/"), 1); +const StringRef UrlParser::_s_colon(const_cast(":"), 1); +const StringRef UrlParser::_s_question(const_cast("?"), 1); +const StringRef UrlParser::_s_hash(const_cast("#"), 1); const StringSearch UrlParser::_s_protocol_search(&_s_protocol); const StringSearch UrlParser::_s_at_search(&_s_at); const StringSearch UrlParser::_s_slash_search(&_s_slash); @@ -45,11 +45,11 @@ const StringSearch UrlParser::_s_colon_search(&_s_colon); const StringSearch UrlParser::_s_question_search(&_s_question); const StringSearch UrlParser::_s_hash_search(&_s_hash); -bool UrlParser::parse_url(const StringValue& url, UrlPart part, StringValue* result) { - result->ptr = nullptr; - result->len = 0; +bool UrlParser::parse_url(const StringRef& url, UrlPart part, StringRef* result) { + result->data = nullptr; + result->size = 0; // Remove leading and trailing spaces. - StringValue trimmed_url = url.trim(); + StringRef trimmed_url = url.trim(); // All parts require checking for the _s_protocol. int32_t protocol_pos = _s_protocol_search.search(&trimmed_url); @@ -58,7 +58,7 @@ bool UrlParser::parse_url(const StringValue& url, UrlPart part, StringValue* res } // Positioned to first char after '://'. - StringValue protocol_end = trimmed_url.substring(protocol_pos + _s_protocol.len); + StringRef protocol_end = trimmed_url.substring(protocol_pos + _s_protocol.size); switch (part) { case AUTHORITY: { @@ -78,7 +78,7 @@ bool UrlParser::parse_url(const StringValue& url, UrlPart part, StringValue* res return true; } - StringValue path_start = protocol_end.substring(start_pos); + StringRef path_start = protocol_end.substring(start_pos); int32_t end_pos; if (part == FILE) { @@ -107,10 +107,10 @@ bool UrlParser::parse_url(const StringValue& url, UrlPart part, StringValue* res start_pos = 0; } else { // Skip '@'. - start_pos += _s_at.len; + start_pos += _s_at.size; } - StringValue host_start = protocol_end.substring(start_pos); + StringRef host_start = protocol_end.substring(start_pos); // Find ':' to strip out port. int32_t end_pos = _s_colon_search.search(&host_start); @@ -137,7 +137,7 @@ bool UrlParser::parse_url(const StringValue& url, UrlPart part, StringValue* res return false; } - StringValue query_start = protocol_end.substring(start_pos + _s_question.len); + StringRef query_start = protocol_end.substring(start_pos + _s_question.size); // End string _s_at next '#'. int32_t end_pos = _s_hash_search.search(&query_start); *result = query_start.substring(0, end_pos); @@ -153,7 +153,7 @@ bool UrlParser::parse_url(const StringValue& url, UrlPart part, StringValue* res return false; } - *result = protocol_end.substring(start_pos + _s_hash.len); + *result = protocol_end.substring(start_pos + _s_hash.size); break; } @@ -179,10 +179,10 @@ bool UrlParser::parse_url(const StringValue& url, UrlPart part, StringValue* res start_pos = 0; } else { // Skip '@'. - start_pos += _s_at.len; + start_pos += _s_at.size; } - StringValue host_start = protocol_end.substring(start_pos); + StringRef host_start = protocol_end.substring(start_pos); // Find ':' to strip out port. int32_t end_pos = _s_colon_search.search(&host_start); //no port found @@ -190,7 +190,7 @@ bool UrlParser::parse_url(const StringValue& url, UrlPart part, StringValue* res return false; } - StringValue port_start_str = protocol_end.substring(end_pos + _s_colon.len); + StringRef port_start_str = protocol_end.substring(end_pos + _s_colon.size); int32_t port_end_pos = _s_slash_search.search(&port_start_str); //if '/' not found, try to find '?' if (port_end_pos < 0) { @@ -207,20 +207,20 @@ bool UrlParser::parse_url(const StringValue& url, UrlPart part, StringValue* res return true; } -bool UrlParser::parse_url_key(const StringValue& url, UrlPart part, const StringValue& key, - StringValue* result) { +bool UrlParser::parse_url_key(const StringRef& url, UrlPart part, const StringRef& key, + StringRef* result) { // Part must be query to ask for a specific query key. if (part != QUERY) { return false; } // Remove leading and trailing spaces. - StringValue trimmed_url = url.trim(); + StringRef trimmed_url = url.trim(); // Search for the key in the url, ignoring malformed URLs for now. StringSearch key_search(&key); - while (trimmed_url.len > 0) { + while (trimmed_url.size > 0) { // Search for the key in the current substring. int32_t key_pos = key_search.search(&trimmed_url); bool match = true; @@ -232,31 +232,31 @@ bool UrlParser::parse_url_key(const StringValue& url, UrlPart part, const String // Key pos must be != 0 because it must be preceded by a '?' or a '&'. // Check that the char before key_pos is either '?' or '&'. if (key_pos == 0 || - (trimmed_url.ptr[key_pos - 1] != '?' && trimmed_url.ptr[key_pos - 1] != '&')) { + (trimmed_url.data[key_pos - 1] != '?' && trimmed_url.data[key_pos - 1] != '&')) { match = false; } // Advance substring beyond matching key. - trimmed_url = trimmed_url.substring(key_pos + key.len); + trimmed_url = trimmed_url.substring(key_pos + key.size); if (!match) { continue; } - if (trimmed_url.len <= 0) { + if (trimmed_url.size <= 0) { break; } // Next character must be '=', otherwise the match cannot be a key in the query part. - if (trimmed_url.ptr[0] != '=') { + if (trimmed_url.data[0] != '=') { continue; } int32_t pos = 1; // Find ending position of key's value by matching '#' or '&'. - while (pos < trimmed_url.len) { - switch (trimmed_url.ptr[pos]) { + while (pos < trimmed_url.size) { + switch (trimmed_url.data[pos]) { case '#': case '&': *result = trimmed_url.substring(1, pos - 1); @@ -274,13 +274,13 @@ bool UrlParser::parse_url_key(const StringValue& url, UrlPart part, const String return false; } -UrlParser::UrlPart UrlParser::get_url_part(const StringValue& part) { +UrlParser::UrlPart UrlParser::get_url_part(const StringRef& part) { // Quick filter on requested URL part, based on first character. // Hive requires the requested URL part to be all upper case. std::string part_str = part.to_string(); transform(part_str.begin(), part_str.end(), part_str.begin(), ::toupper); - StringValue newPart = StringValue(part_str); - switch (newPart.ptr[0]) { + StringRef newPart = StringRef(part_str); + switch (newPart.data[0]) { case 'A': { if (!newPart.eq(_s_url_authority)) { return INVALID; @@ -346,10 +346,10 @@ UrlParser::UrlPart UrlParser::get_url_part(const StringValue& part) { } } -StringValue UrlParser::extract_url(StringValue url, StringValue name) { - StringValue result("", 0); +StringRef UrlParser::extract_url(StringRef url, StringRef name) { + StringRef result("", 0); // Remove leading and trailing spaces. - StringValue trimmed_url = url.trim(); + StringRef trimmed_url = url.trim(); // find '?' int32_t question_pos = _s_question_search.search(&trimmed_url); if (question_pos < 0) { @@ -360,9 +360,9 @@ StringValue UrlParser::extract_url(StringValue url, StringValue name) { // find '#' int32_t hash_pos = _s_hash_search.search(&trimmed_url); - StringValue sub_url; + StringRef sub_url; if (hash_pos < 0) { - sub_url = trimmed_url.substring(question_pos + 1, trimmed_url.len - question_pos - 1); + sub_url = trimmed_url.substring(question_pos + 1, trimmed_url.size - question_pos - 1); } else { sub_url = trimmed_url.substring(question_pos + 1, hash_pos - question_pos - 1); } @@ -370,8 +370,8 @@ StringValue UrlParser::extract_url(StringValue url, StringValue name) { // find '&' and '=', and extract target parameter // Example: k1=aa&k2=bb&k3=cc&test=dd int64_t and_pod; - auto len = sub_url.len; - StringValue key_url; + auto len = sub_url.size; + StringRef key_url; while (true) { if (len <= 0) { break; @@ -385,14 +385,14 @@ StringValue UrlParser::extract_url(StringValue url, StringValue name) { key_url = end_pos == -1 ? sub_url : sub_url.substring(0, end_pos); sub_url = result; } - len = sub_url.len; + len = sub_url.size; auto eq_pod = key_url.find_first_of('='); if (eq_pod == -1) { // invalid url. like: k1&k2=bb continue; } - int32_t key_len = key_url.len; + int32_t key_len = key_url.size; auto key = key_url.substring(0, eq_pod); if (name == key) { return key_url.substring(eq_pod + 1, key_len - eq_pod - 1); diff --git a/be/src/util/url_parser.h b/be/src/util/url_parser.h index e2a7ca6872..882ae251ff 100644 --- a/be/src/util/url_parser.h +++ b/be/src/util/url_parser.h @@ -18,7 +18,7 @@ #pragma once #include "runtime/string_search.hpp" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { @@ -47,42 +47,42 @@ public: // Tries to parse the part from url. Places the result in result. // Returns false if the URL is malformed or if part is invalid. True otherwise. // If false is returned the contents of results are undefined. - static bool parse_url(const StringValue& url, UrlPart part, StringValue* result); + static bool parse_url(const StringRef& url, UrlPart part, StringRef* result); // Tries to parse key from url. Places the result in result. // Returns false if the URL is malformed or if part is invalid. True otherwise. // If false is returned the contents of results are undefined. - static bool parse_url_key(const StringValue& url, UrlPart part, const StringValue& key, - StringValue* result); + static bool parse_url_key(const StringRef& url, UrlPart part, const StringRef& key, + StringRef* result); // Compares part against url_authority, url_file, url_host, etc., // and returns the corresponding enum. // If part did not match any of the url part constants, returns INVALID. - static UrlPart get_url_part(const StringValue& part); + static UrlPart get_url_part(const StringRef& part); // Extract parameter value from url // Example for url: // http://doris.apache.org?k1=aa&k2=bb&k3=cc&test=dd#999 - static StringValue extract_url(StringValue url, StringValue name); + static StringRef extract_url(StringRef url, StringRef name); private: // Constants representing parts of a URL. - static const StringValue _s_url_authority; - static const StringValue _s_url_file; - static const StringValue _s_url_host; - static const StringValue _s_url_path; - static const StringValue _s_url_protocol; - static const StringValue _s_url_query; - static const StringValue _s_url_ref; - static const StringValue _s_url_userinfo; - static const StringValue _s_url_port; + static const StringRef _s_url_authority; + static const StringRef _s_url_file; + static const StringRef _s_url_host; + static const StringRef _s_url_path; + static const StringRef _s_url_protocol; + static const StringRef _s_url_query; + static const StringRef _s_url_ref; + static const StringRef _s_url_userinfo; + static const StringRef _s_url_port; // Constants used in searching for URL parts. - static const StringValue _s_protocol; - static const StringValue _s_at; - static const StringValue _s_slash; - static const StringValue _s_colon; - static const StringValue _s_question; - static const StringValue _s_hash; + static const StringRef _s_protocol; + static const StringRef _s_at; + static const StringRef _s_slash; + static const StringRef _s_colon; + static const StringRef _s_question; + static const StringRef _s_hash; static const StringSearch _s_protocol_search; static const StringSearch _s_at_search; static const StringSearch _s_slash_search; diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 4c42d341c9..b2c6e36c5c 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -59,6 +59,7 @@ set(VEC_FILES common/exception.cpp common/mremap.cpp common/pod_array.cpp + common/string_ref.cpp common/sort/heap_sorter.cpp common/sort/sorter.cpp common/sort/topn_sorter.cpp diff --git a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp index 4194befae2..59aeec80f4 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp @@ -34,7 +34,7 @@ AggregateFunctionPtr create_aggregate_function_orthogonal(const std::string& nam LOG(WARNING) << "Incorrect number of arguments for aggregate function " << name; return nullptr; } else if (argument_types.size() == 1) { - return std::make_shared>>(argument_types); + return std::make_shared>>(argument_types); } else { const IDataType& argument_type = *argument_types[1]; AggregateFunctionPtr res(create_with_numeric_type( diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 95cd848763..354fb78e2e 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -21,10 +21,10 @@ #include -#include "runtime/string_value.h" #include "vec/columns/column.h" #include "vec/columns/column_string.h" #include "vec/columns/predicate_column.h" +#include "vec/common/string_ref.h" #include "vec/core/types.h" namespace doris::vectorized { @@ -57,7 +57,7 @@ public: using Self = ColumnDictionary; using value_type = T; using Container = PaddedPODArray; - using DictContainer = PaddedPODArray; + using DictContainer = PaddedPODArray; using HashValueContainer = PaddedPODArray; // used for bloom filter bool is_column_dictionary() const override { return true; } @@ -193,9 +193,9 @@ public: size_t length = 0; for (size_t i = 0; i != sel_size; ++i) { auto& value = _dict.get_value(_codes[sel[i]]); - strings[i].data = value.ptr; - strings[i].size = value.len; - length += value.len; + strings[i].data = value.data; + strings[i].size = value.size; + length += value.size; } res_col->get_offsets().reserve(sel_size + res_col->get_offsets().size()); res_col->get_chars().reserve(length + res_col->get_chars().size()); @@ -217,7 +217,7 @@ public: void insert_many_dict_data(const StringRef* dict_array, uint32_t dict_num) { _dict.reserve(_dict.size() + dict_num); for (uint32_t i = 0; i < dict_num; ++i) { - auto value = StringValue(dict_array[i].data, dict_array[i].size); + auto value = StringRef(dict_array[i].data, dict_array[i].size); _dict.insert_value(value); } } @@ -228,7 +228,7 @@ public: if (_dict.empty()) { _dict.reserve(dict_num); for (uint32_t i = 0; i < dict_num; ++i) { - auto value = StringValue(dict_array[i].data, dict_array[i].size); + auto value = StringRef(dict_array[i].data, dict_array[i].size); _dict.insert_value(value); } } @@ -259,9 +259,9 @@ public: } } - int32_t find_code(const StringValue& value) const { return _dict.find_code(value); } + int32_t find_code(const StringRef& value) const { return _dict.find_code(value); } - int32_t find_code_by_bound(const StringValue& value, bool greater, bool eq) const { + int32_t find_code_by_bound(const StringRef& value, bool greater, bool eq) const { return _dict.find_code_by_bound(value, greater, eq); } @@ -271,7 +271,7 @@ public: uint32_t get_hash_value(uint32_t idx) const { return _dict.get_hash_value(_codes[idx]); } - void find_codes(const phmap::flat_hash_set& values, + void find_codes(const phmap::flat_hash_set& values, std::vector& selected) const { return _dict.find_codes(values, selected); } @@ -297,19 +297,19 @@ public: for (size_t i = 0; i < _codes.size(); ++i) { auto& code = reinterpret_cast(_codes[i]); auto value = _dict.get_value(code); - res->insert_data(value.ptr, value.len); + res->insert_data(value.data, value.size); } clear(); _dict.clear(); return res; } - inline const StringValue& get_value(value_type code) const { return _dict.get_value(code); } + inline const StringRef& get_value(value_type code) const { return _dict.get_value(code); } - inline StringValue get_shrink_value(value_type code) const { - StringValue result = _dict.get_value(code); + inline StringRef get_shrink_value(value_type code) const { + StringRef result = _dict.get_value(code); if (_type == OLAP_FIELD_TYPE_CHAR) { - result.len = strnlen(result.ptr, result.len); + result.size = strnlen(result.data, result.size); } return result; } @@ -324,12 +324,12 @@ public: void reserve(size_t n) { _dict_data->reserve(n); } - void insert_value(StringValue& value) { + void insert_value(StringRef& value) { _dict_data->push_back_without_reserve(value); - _total_str_len += value.len; + _total_str_len += value.size; } - int32_t find_code(const StringValue& value) const { + int32_t find_code(const StringRef& value) const { for (size_t i = 0; i < _dict_data->size(); i++) { if ((*_dict_data)[i] == value) { return i; @@ -340,9 +340,9 @@ public: T get_null_code() const { return -1; } - inline StringValue& get_value(T code) { return (*_dict_data)[code]; } + inline StringRef& get_value(T code) { return (*_dict_data)[code]; } - inline const StringValue& get_value(T code) const { return (*_dict_data)[code]; } + inline const StringRef& get_value(T code) const { return (*_dict_data)[code]; } // The function is only used in the runtime filter feature inline void generate_hash_values_for_runtime_filter(FieldType type) { @@ -357,13 +357,13 @@ public: // Remove the suffix 0 // When writing data, use the CharField::consume function to fill in the trailing 0. - // For dictionary data of char type, sv.len is the schema length, + // For dictionary data of char type, sv.size is the schema length, // so use strnlen to remove the 0 at the end to get the actual length. - int32_t len = sv.len; + int32_t len = sv.size; if (type == OLAP_FIELD_TYPE_CHAR) { - len = strnlen(sv.ptr, sv.len); + len = strnlen(sv.data, sv.size); } - uint32_t hash_val = HashUtil::murmur_hash3_32(sv.ptr, len, 0); + uint32_t hash_val = HashUtil::murmur_hash3_32(sv.data, len, 0); _hash_values[i] = hash_val; } } @@ -388,7 +388,7 @@ public: // so upper_bound is the code 0 of b, then evaluate code < 0 and returns empty // If the predicate is col <= 'a' and upper_bound-1 is -1, // then evaluate code <= -1 and returns empty - int32_t find_code_by_bound(const StringValue& value, bool greater, bool eq) const { + int32_t find_code_by_bound(const StringRef& value, bool greater, bool eq) const { auto code = find_code(value); if (code >= 0) { return code; @@ -398,7 +398,7 @@ public: return greater ? bound - greater + eq : bound - eq; } - void find_codes(const phmap::flat_hash_set& values, + void find_codes(const phmap::flat_hash_set& values, std::vector& selected) const { size_t dict_word_num = _dict_data->size(); selected.resize(dict_word_num); @@ -476,8 +476,8 @@ public: } private: - StringValue _null_value = StringValue(); - StringValue::Comparator _comparator; + StringRef _null_value = StringRef(); + StringRef::Comparator _comparator; // dict code -> dict value std::unique_ptr _dict_data; std::vector _code_convert_table; diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h index f307d12473..e2a464b5ea 100644 --- a/be/src/vec/columns/predicate_column.h +++ b/be/src/vec/columns/predicate_column.h @@ -21,11 +21,11 @@ #include "olap/uint24.h" #include "runtime/mem_pool.h" #include "runtime/primitive_type.h" -#include "runtime/string_value.h" #include "vec/columns/column.h" #include "vec/columns/column_decimal.h" #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" +#include "vec/common/string_ref.h" #include "vec/core/types.h" namespace doris::vectorized { @@ -95,10 +95,10 @@ private: size_t length = 0; for (size_t i = 0; i < sel_size; i++) { uint16_t n = sel[i]; - auto& sv = reinterpret_cast(data[n]); - refs[i].data = sv.ptr; - refs[i].size = sv.len; - length += sv.len; + auto& sv = reinterpret_cast(data[n]); + refs[i].data = sv.data; + refs[i].size = sv.size; + length += sv.size; } res_ptr->get_offsets().reserve(sel_size + res_ptr->get_offsets().size()); res_ptr->get_chars().reserve(length + res_ptr->get_chars().size()); @@ -187,7 +187,7 @@ public: } void insert_string_value(const char* data_ptr, size_t length) { - StringValue sv((char*)data_ptr, length); + StringRef sv((char*)data_ptr, length); data.push_back_without_reserve(sv); } @@ -211,7 +211,7 @@ public: } void insert_data(const char* data_ptr, size_t length) override { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { insert_string_value(data_ptr, length); } else if constexpr (std::is_same_v) { insert_decimal_value(data_ptr, length); @@ -242,7 +242,7 @@ public: insert_many_in_copy_way(data_ptr, num); } else if constexpr (std::is_same_v) { insert_many_in_copy_way(data_ptr, num); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { // here is unreachable, just for compilation to be able to pass } else if constexpr (Type == TYPE_DATE) { insert_many_date(data_ptr, num); @@ -253,7 +253,7 @@ public: void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict, size_t num, uint32_t /*dict_num*/) override { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { for (size_t end_index = start_index + num; start_index < end_index; ++start_index) { int32_t codeword = data_array[start_index]; insert_string_value(dict[codeword].data, dict[codeword].size); @@ -266,7 +266,7 @@ public: if (UNLIKELY(num == 0)) { return; } - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { if (_pool == nullptr) { _pool.reset(new MemPool()); } @@ -278,10 +278,10 @@ public: auto* data_ptr = &data[org_elem_num]; for (size_t i = 0; i != num; ++i) { - data_ptr[i].ptr = destination + offsets[i] - offsets[0]; - data_ptr[i].len = offsets[i + 1] - offsets[i]; + data_ptr[i].data = destination + offsets[i] - offsets[0]; + data_ptr[i].size = offsets[i + 1] - offsets[i]; } - DCHECK(data_ptr[num - 1].ptr + data_ptr[num - 1].len == destination + total_mem_size); + DCHECK(data_ptr[num - 1].data + data_ptr[num - 1].size == destination + total_mem_size); } } @@ -290,7 +290,7 @@ public: if (num == 0) { return; } - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { if (_pool == nullptr) { _pool.reset(new MemPool()); } @@ -307,8 +307,8 @@ public: uint32_t fragment_start_offset = start_offset_array[0]; size_t fragment_len = 0; for (size_t i = 0; i < num; i++) { - data[org_elem_num + i].ptr = destination + fragment_len; - data[org_elem_num + i].len = len_array[i]; + data[org_elem_num + i].data = destination + fragment_len; + data[org_elem_num + i].size = len_array[i]; fragment_len += len_array[i]; // Compute the largest continuous memcpy block and copy them. // If this is the last element in data array, then should copy the current memory block. @@ -447,7 +447,7 @@ public: } Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) override { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { insert_string_to_res_column(sel, sel_size, reinterpret_cast(col_ptr)); } else if constexpr (std::is_same_v) { diff --git a/be/src/vec/common/hash_table/hash.h b/be/src/vec/common/hash_table/hash.h index 6d63a16735..28288a4813 100644 --- a/be/src/vec/common/hash_table/hash.h +++ b/be/src/vec/common/hash_table/hash.h @@ -23,6 +23,7 @@ #include #include "parallel_hashmap/phmap_utils.h" +#include "vec/common/string_ref.h" #include "vec/common/uint128.h" #include "vec/core/types.h" @@ -100,7 +101,7 @@ struct DefaultHash { }; template <> -struct DefaultHash : public StringRefHash {}; +struct DefaultHash : public doris::StringRefHash {}; template struct HashCRC32; diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h index 337d3ed8f9..41e7a4b446 100644 --- a/be/src/vec/common/hash_table/hash_table.h +++ b/be/src/vec/common/hash_table/hash_table.h @@ -163,7 +163,7 @@ struct HashTableCell { Key key; - HashTableCell() {} + HashTableCell() = default; /// Create a cell with the given key / key and value. HashTableCell(const Key& key_, const State&) : key(key_) {} diff --git a/be/src/vec/common/hash_table/hash_table_key_holder.h b/be/src/vec/common/hash_table/hash_table_key_holder.h index eb507d16a5..a422ad7eef 100644 --- a/be/src/vec/common/hash_table/hash_table_key_holder.h +++ b/be/src/vec/common/hash_table/hash_table_key_holder.h @@ -99,7 +99,8 @@ struct ArenaKeyHolder { } // namespace doris::vectorized -inline StringRef& ALWAYS_INLINE key_holder_get_key(doris::vectorized::ArenaKeyHolder& holder) { +inline doris::StringRef& ALWAYS_INLINE +key_holder_get_key(doris::vectorized::ArenaKeyHolder& holder) { return holder.key; } @@ -125,7 +126,8 @@ struct SerializedKeyHolder { } // namespace doris::vectorized -inline StringRef& ALWAYS_INLINE key_holder_get_key(doris::vectorized::SerializedKeyHolder& holder) { +inline doris::StringRef& ALWAYS_INLINE +key_holder_get_key(doris::vectorized::SerializedKeyHolder& holder) { return holder.key; } diff --git a/be/src/vec/common/hash_table/string_hash_map.h b/be/src/vec/common/hash_table/string_hash_map.h index 344f468dec..99624cab42 100644 --- a/be/src/vec/common/hash_table/string_hash_map.h +++ b/be/src/vec/common/hash_table/string_hash_map.h @@ -30,7 +30,7 @@ struct StringHashMapCell : public HashMapCellvalue.first); } /// NOLINT + const doris::StringRef get_key() const { return to_string_ref(this->value.first); } /// NOLINT // internal static const Key& get_key(const value_type& value_) { return value_.first; } }; @@ -50,7 +50,7 @@ struct StringHashMapCell void set_zero() { this->value.first.high = 0; } // external - const StringRef get_key() const { return to_string_ref(this->value.first); } /// NOLINT + const doris::StringRef get_key() const { return to_string_ref(this->value.first); } /// NOLINT // internal static const StringKey16& get_key(const value_type& value_) { return value_.first; } }; @@ -70,24 +70,24 @@ struct StringHashMapCell void set_zero() { this->value.first.c = 0; } // external - const StringRef get_key() const { return to_string_ref(this->value.first); } /// NOLINT + const doris::StringRef get_key() const { return to_string_ref(this->value.first); } /// NOLINT // internal static const StringKey24& get_key(const value_type& value_) { return value_.first; } }; template -struct StringHashMapCell - : public HashMapCellWithSavedHash + : public HashMapCellWithSavedHash { - using Base = - HashMapCellWithSavedHash; + using Base = HashMapCellWithSavedHash; using value_type = typename Base::value_type; using Base::Base; static constexpr bool need_zero_value_storage = false; // external using Base::get_key; // internal - static const StringRef& get_key(const value_type& value_) { return value_.first; } + static const doris::StringRef& get_key(const value_type& value_) { return value_.first; } template StringHashMapCell(const StringHashMapCell& other) { @@ -107,21 +107,21 @@ struct StringHashMapCell template struct StringHashMapSubMaps { - using T0 = StringHashTableEmpty>; + using T0 = StringHashTableEmpty>; using T1 = HashMapTable, StringHashTableHash, StringHashTableGrower<>, Allocator>; using T2 = HashMapTable, StringHashTableHash, StringHashTableGrower<>, Allocator>; using T3 = HashMapTable, StringHashTableHash, StringHashTableGrower<>, Allocator>; - using Ts = HashMapTable, StringHashTableHash, - StringHashTableGrower<>, Allocator>; + using Ts = HashMapTable, + StringHashTableHash, StringHashTableGrower<>, Allocator>; }; template class StringHashMap : public StringHashTable> { public: - using Key = StringRef; + using Key = doris::StringRef; using Base = StringHashTable>; using Self = StringHashMap; using LookupResult = typename Base::LookupResult; @@ -177,7 +177,7 @@ public: template void ALWAYS_INLINE for_each_value(Func&& func) { if (this->m0.size()) { - func(StringRef {}, this->m0.zero_value()->get_second()); + func(doris::StringRef {}, this->m0.zero_value()->get_second()); } for (auto& v : this->m1) { diff --git a/be/src/vec/common/hash_table/string_hash_table.h b/be/src/vec/common/hash_table/string_hash_table.h index ad164b1b8f..00d76c4a24 100644 --- a/be/src/vec/common/hash_table/string_hash_table.h +++ b/be/src/vec/common/hash_table/string_hash_table.h @@ -35,15 +35,15 @@ struct StringKey24 { bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } }; -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { assert(n != 0); return {reinterpret_cast(&n), 8ul - (__builtin_clzll(n) >> 3)}; } -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) { assert(n.high != 0); return {reinterpret_cast(&n), 16ul - (__builtin_clzll(n.high) >> 3)}; } -inline StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) { +inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) { assert(n.c != 0); return {reinterpret_cast(&n), 24ul - (__builtin_clzll(n.c) >> 3)}; } @@ -79,7 +79,9 @@ struct StringHashTableHash { return util_hash::CityHash64(reinterpret_cast(&key), 24); } #endif - size_t ALWAYS_INLINE operator()(StringRef key) const { return StringRefHash()(key); } + size_t ALWAYS_INLINE operator()(doris::StringRef key) const { + return doris::StringRefHash()(key); + } }; template @@ -107,7 +109,9 @@ public: void clear_has_zero() { _has_zero = false; - if (!std::is_trivially_destructible_v) zero_value()->~Cell(); + if (!std::is_trivially_destructible_v) { + zero_value()->~Cell(); + } } Cell* zero_value() { return std::launder(reinterpret_cast(&zero_value_storage)); } @@ -125,8 +129,9 @@ public: const auto& key = key_holder_get_key(key_holder); set_has_zero(key); inserted = true; - } else + } else { inserted = false; + } it = zero_value(); } @@ -219,7 +224,7 @@ protected: using T2 = typename SubMaps::T2; using T3 = typename SubMaps::T3; - // Long strings are stored as StringRef along with saved hash + // Long strings are stored as doris::StringRef along with saved hash using Ts = typename SubMaps::Ts; using Self = StringHashTable; @@ -250,7 +255,7 @@ protected: friend class StringHashTable; public: - iterator_base() {} + iterator_base() = default; iterator_base(Container* container_, bool end = false) : container(container_) { if (end) { sub_table_index = 4; @@ -373,8 +378,6 @@ protected: } } } - while (need_switch_to_next) - ; return static_cast(*this); } @@ -449,7 +452,7 @@ protected: }; public: - using Key = StringRef; + using Key = doris::StringRef; using key_type = Key; using mapped_type = typename Ts::mapped_type; using value_type = typename Ts::value_type; @@ -487,7 +490,7 @@ public: template static auto ALWAYS_INLINE dispatch(Self& self, KeyHolder&& key_holder, Func&& func) { StringHashTableHash hash; - const StringRef& x = key_holder_get_key(key_holder); + const doris::StringRef& x = key_holder_get_key(key_holder); const size_t sz = x.size; if (sz == 0) { key_holder_discard_key(key_holder); diff --git a/be/src/vec/common/string_ref.cpp b/be/src/vec/common/string_ref.cpp new file mode 100644 index 0000000000..20cfbad708 --- /dev/null +++ b/be/src/vec/common/string_ref.cpp @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/StringRef. +// And modified by Doris + +#include "string_ref.h" + +namespace doris { + +StringRef StringRef::trim() const { + // Remove leading and trailing spaces. + int32_t begin = 0; + + while (begin < size && data[begin] == ' ') { + ++begin; + } + + int32_t end = size - 1; + + while (end > begin && data[end] == ' ') { + --end; + } + + return StringRef(data + begin, end - begin + 1); +} + +// TODO: rewrite in AVX2 +size_t StringRef::find_first_of(char c) const { + const char* p = static_cast(memchr(data, c, size)); + return p == nullptr ? -1 : p - data; +} + +StringRef StringRef::min_string_val() { + return StringRef((char*)(&StringRef::MIN_CHAR), 0); +} + +StringRef StringRef::max_string_val() { + return StringRef((char*)(&StringRef::MAX_CHAR), 1); +} + +bool StringRef::start_with(const StringRef& search_string) const { + DCHECK(size >= search_string.size); + if (search_string.size == 0) { + return true; + } + +#if defined(__SSE2__) || defined(__aarch64__) + return memequalSSE2Wide(data, search_string.data, search_string.size); +#else + return 0 == memcmp(data, search_string.data, search_string.size); +#endif +} +bool StringRef::end_with(const StringRef& search_string) const { + DCHECK(size >= search_string.size); + if (search_string.size == 0) { + return true; + } + +#if defined(__SSE2__) || defined(__aarch64__) + return memequalSSE2Wide(data + size - search_string.size, search_string.data, + search_string.size); +#else + return 0 == memcmp(data + size - search_string.size, search_string.data, search_string.size); +#endif +} +} // namespace doris diff --git a/be/src/vec/common/string_ref.h b/be/src/vec/common/string_ref.h index c146b48187..3eef585a60 100644 --- a/be/src/vec/common/string_ref.h +++ b/be/src/vec/common/string_ref.h @@ -29,7 +29,10 @@ #include "gutil/hash/city.h" #include "gutil/hash/hash128to64.h" #include "udf/udf.h" +#include "util/cpu_info.h" +#include "util/hash_util.hpp" #include "util/slice.h" +#include "vec/common/string_ref.h" #include "vec/common/unaligned.h" #include "vec/core/types.h" @@ -40,12 +43,19 @@ #if defined(__SSE4_2__) #include #include + +#include "util/sse_util.hpp" #endif #if defined(__aarch64__) #include #endif +namespace doris { + +/// unnamed namespace packaging simd-style equality compare functions. +namespace { + #if defined(__SSE2__) || defined(__aarch64__) /** Compare strings for equality. @@ -112,19 +122,26 @@ inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) { p1 += 64; p2 += 64; size -= 64; - } else + } else { return false; + } } switch (size / 16) { case 3: - if (!compareSSE2(p1 + 32, p2 + 32)) return false; + if (!compareSSE2(p1 + 32, p2 + 32)) { + return false; + } [[fallthrough]]; case 2: - if (!compareSSE2(p1 + 16, p2 + 16)) return false; + if (!compareSSE2(p1 + 16, p2 + 16)) { + return false; + } [[fallthrough]]; case 1: - if (!compareSSE2(p1, p2)) return false; + if (!compareSSE2(p1, p2)) { + return false; + } } return compareSSE2(p1 + size - 16, p2 + size - 16); @@ -132,24 +149,85 @@ inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) { #endif +// Compare two strings using sse4.2 intrinsics if they are available. This code assumes +// that the trivial cases are already handled (i.e. one string is empty). +// Returns: +// < 0 if s1 < s2 +// 0 if s1 == s2 +// > 0 if s1 > s2 +// The SSE code path is just under 2x faster than the non-sse code path. +// - s1/n1: ptr/len for the first string +// - s2/n2: ptr/len for the second string +// - len: min(n1, n2) - this can be more cheaply passed in by the caller +inline int string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2, int64_t len) { + DCHECK_EQ(len, std::min(n1, n2)); +#ifdef __SSE4_2__ + while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) { + __m128i xmm0 = _mm_loadu_si128(reinterpret_cast(s1)); + __m128i xmm1 = _mm_loadu_si128(reinterpret_cast(s2)); + int chars_match = _mm_cmpestri(xmm0, sse_util::CHARS_PER_128_BIT_REGISTER, xmm1, + sse_util::CHARS_PER_128_BIT_REGISTER, sse_util::STRCMP_MODE); + if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) { + return (unsigned char)s1[chars_match] - (unsigned char)s2[chars_match]; + } + len -= sse_util::CHARS_PER_128_BIT_REGISTER; + s1 += sse_util::CHARS_PER_128_BIT_REGISTER; + s2 += sse_util::CHARS_PER_128_BIT_REGISTER; + } +#endif + unsigned char u1, u2; + while (len-- > 0) { + u1 = (unsigned char)*s1++; + u2 = (unsigned char)*s2++; + if (u1 != u2) { + return u1 - u2; + } + if (u1 == '\0') { + return n1 - n2; + } + } + + return n1 - n2; +} + +} // unnamed namespace + /// The thing to avoid creating strings to find substrings in the hash table. +/// User should make sure data source is const. +/// maybe considering rewrite it with std::span / std::basic_string_view is meaningful. struct StringRef { + // TODO: opening member accessing really damages. const char* data = nullptr; size_t size = 0; + StringRef() = default; StringRef(const char* data_, size_t size_) : data(data_), size(size_) {} StringRef(const unsigned char* data_, size_t size_) - : data(reinterpret_cast(data_)), size(size_) {} + : StringRef(reinterpret_cast(data_), size_) {} + StringRef(const std::string& s) : data(s.data()), size(s.size()) {} - StringRef() = default; + StringRef(const StringVal& src) : StringRef(src.ptr, src.len) {} + explicit StringRef(const char* str) : data(str), size(strlen(str)) {} + + static StringRef from_string_val(const StringVal& src) { return StringRef(src); } std::string to_string() const { return std::string(data, size); } + std::string debug_string() const { return to_string(); } std::string_view to_string_view() const { return std::string_view(data, size); } - doris::Slice to_slice() const { return doris::Slice(data, size); } + Slice to_slice() const { return doris::Slice(data, size); } - // this is just for show, eg. print data to error log, to avoid print large string. + // this is just for show, e.g. print data to error log, to avoid print large string. std::string to_prefix(size_t length) const { return std::string(data, std::min(length, size)); } + // TODO: this function is dangerous! + StringVal to_string_val() const { + return StringVal(reinterpret_cast(const_cast(data)), size); + } + + void to_string_val(StringVal* sv) const { + *sv = StringVal(reinterpret_cast(const_cast(data)), size); + } + explicit operator std::string() const { return to_string(); } operator std::string_view() const { return std::string_view {data, size}; } @@ -157,66 +235,94 @@ struct StringRef { return StringRef(data + start_pos, (new_len < 0) ? (size - start_pos) : new_len); } - StringVal to_string_val() { - return StringVal(reinterpret_cast(const_cast(data)), size); + StringRef substring(int start_pos) const { return substring(start_pos, size - start_pos); } + + // Trims leading and trailing spaces. + StringRef trim() const; + + // support for type_limit + static constexpr char MIN_CHAR = 0x00; + static constexpr char MAX_CHAR = 0xFF; + static StringRef min_string_val(); + static StringRef max_string_val(); + + bool start_with(const StringRef& search_string) const; + bool end_with(const StringRef& search_string) const; + + // Byte-by-byte comparison. Returns: + // this < other: -1 + // this == other: 0 + // this > other: 1 + int compare(const StringRef& other) const { + int l = std::min(size, other.size); + + if (l == 0) { + if (size == other.size) { + return 0; + } else if (size == 0) { + return -1; + } else { + DCHECK_EQ(other.size, 0); + return 1; + } + } + + return string_compare(this->data, this->size, other.data, other.size, l); } - static StringRef from_string_val(StringVal sv) { - return StringRef(reinterpret_cast(sv.ptr), sv.len); + void replace(const char* ptr, int len) { + this->data = ptr; + this->size = len; } - bool start_with(StringRef& search_string) const { - DCHECK(size >= search_string.size); - if (search_string.size == 0) return true; + // Find the first position char of appear, return -1 if not found + size_t find_first_of(char c) const; -#if defined(__SSE2__) || defined(__aarch64__) - return memequalSSE2Wide(data, search_string.data, search_string.size); -#else - return 0 == memcmp(data, search_string.data, search_string.size); + // == + bool eq(const StringRef& other) const { + if (this->size != other.size) { + return false; + } +#if defined(__SSE2__) + return memequalSSE2Wide(this->data, other.data, this->size); #endif + return string_compare(this->data, this->size, other.data, other.size, this->size) == 0; } - bool end_with(StringRef& search_string) const { - DCHECK(size >= search_string.size); - if (search_string.size == 0) return true; -#if defined(__SSE2__) || defined(__aarch64__) - return memequalSSE2Wide(data + size - search_string.size, search_string.data, - search_string.size); -#else - return 0 == - memcmp(data + size - search_string.size, search_string.data, search_string.size); -#endif - } -}; + bool operator==(const StringRef& other) const { return eq(other); } + // != + bool ne(const StringRef& other) const { return !eq(other); } + // <= + bool le(const StringRef& other) const { return compare(other) <= 0; } + // >= + bool ge(const StringRef& other) const { return compare(other) >= 0; } + // < + bool lt(const StringRef& other) const { return compare(other) < 0; } + // > + bool gt(const StringRef& other) const { return compare(other) > 0; } + + bool operator!=(const StringRef& other) const { return ne(other); } + + bool operator<=(const StringRef& other) const { return le(other); } + + bool operator>=(const StringRef& other) const { return ge(other); } + + bool operator<(const StringRef& other) const { return lt(other); } + + bool operator>(const StringRef& other) const { return gt(other); } + + struct Comparator { + bool operator()(const StringRef& a, const StringRef& b) const { return a.compare(b) < 0; } + }; +}; // class StringRef + +// This function must be called 'hash_value' to be picked up by boost. +inline std::size_t hash_value(const StringRef& v) { + return HashUtil::hash(v.data, v.size, 0); +} using StringRefs = std::vector; -inline bool operator==(StringRef lhs, StringRef rhs) { - if (lhs.size != rhs.size) return false; - - if (lhs.size == 0) return true; - -#if defined(__SSE2__) || defined(__aarch64__) - return memequalSSE2Wide(lhs.data, rhs.data, lhs.size); -#else - return 0 == memcmp(lhs.data, rhs.data, lhs.size); -#endif -} - -inline bool operator!=(StringRef lhs, StringRef rhs) { - return !(lhs == rhs); -} - -inline bool operator<(StringRef lhs, StringRef rhs) { - int cmp = memcmp(lhs.data, rhs.data, std::min(lhs.size, rhs.size)); - return cmp < 0 || (cmp == 0 && lhs.size < rhs.size); -} - -inline bool operator>(StringRef lhs, StringRef rhs) { - int cmp = memcmp(lhs.data, rhs.data, std::min(lhs.size, rhs.size)); - return cmp > 0 || (cmp == 0 && lhs.size > rhs.size); -} - /** Hash functions. * You can use either CityHash64, * or a function based on the crc32 statement, @@ -278,11 +384,13 @@ inline size_t hash_less_than16(const char* data, size_t size) { } struct CRC32Hash { - size_t operator()(StringRef x) const { + size_t operator()(const StringRef& x) const { const char* pos = x.data; size_t size = x.size; - if (size == 0) return 0; + if (size == 0) { + return 0; + } if (size < 8) { return hash_less_than8(x.data, x.size); @@ -318,22 +426,21 @@ struct CRC32Hash { struct StringRefHash : StringRefHash64 {}; -#endif +#endif // end of hash functions -template <> -struct std::hash : public StringRefHash {}; +inline std::ostream& operator<<(std::ostream& os, const StringRef& str) { + return os << str.to_string(); +} +} // namespace doris namespace ZeroTraits { -inline bool check(const StringRef& x) { +inline bool check(const doris::StringRef& x) { return 0 == x.size; } -inline void set(StringRef& x) { +inline void set(doris::StringRef& x) { x.size = 0; } } // namespace ZeroTraits -inline std::ostream& operator<<(std::ostream& os, const StringRef& str) { - if (str.data) os.write(str.data, str.size); - - return os; -} +template <> +struct std::hash : public doris::StringRefHash {}; diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index a23ecb2a75..4202f76304 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -855,7 +855,7 @@ void Block::deep_copy_slot(void* dst, MemPool* pool, const doris::TypeDescriptor auto item_offset = offset + i; const auto& data_ref = item_type_desc.type != TYPE_ARRAY ? item_column->get_data_at(item_offset) - : StringRef(); + : StringRef {}; if (item_type_desc.is_date_type()) { // In CollectionValue, date type data is stored as either uint24_t or uint64_t. DateTimeValue datetime_value; @@ -890,33 +890,37 @@ void Block::deep_copy_slot(void* dst, MemPool* pool, const doris::TypeDescriptor auto size = bitmap_value->getSizeInBytes(); // serialize the content of string - auto string_slot = reinterpret_cast(dst); - string_slot->ptr = reinterpret_cast(pool->allocate(size)); - bitmap_value->write(string_slot->ptr); - string_slot->len = size; + // TODO: NEED TO REWRITE COMPLETELY. the way writing now is WRONG. + // StringRef shouldn't managing exclusive memory cause it will break RAII. + // besides, accessing object which is essentially const by non-const object + // is UB! + auto string_slot = reinterpret_cast(dst); + string_slot->data = reinterpret_cast(pool->allocate(size)); + bitmap_value->write(const_cast(string_slot->data)); //! + string_slot->size = size; } else if (type_desc.type == TYPE_HLL) { auto hll_value = (HyperLogLog*)(data_ref.data); auto size = hll_value->max_serialized_size(); - auto string_slot = reinterpret_cast(dst); - string_slot->ptr = reinterpret_cast(pool->allocate(size)); - size_t actual_size = hll_value->serialize((uint8_t*)string_slot->ptr); - string_slot->len = actual_size; + auto string_slot = reinterpret_cast(dst); + string_slot->data = reinterpret_cast(pool->allocate(size)); + size_t actual_size = hll_value->serialize((uint8_t*)string_slot->data); + string_slot->size = actual_size; } else if (type_desc.is_string_type()) { // TYPE_OBJECT and TYPE_HLL must be handled before. memcpy(dst, (const void*)(&data_ref), sizeof(data_ref)); // Copy the content of string if (padding_char && type_desc.type == TYPE_CHAR) { // serialize the content of string - auto string_slot = reinterpret_cast(dst); - string_slot->ptr = reinterpret_cast(pool->allocate(type_desc.len)); - string_slot->len = type_desc.len; - memset(string_slot->ptr, 0, type_desc.len); - memcpy(string_slot->ptr, data_ref.data, data_ref.size); + auto string_slot = reinterpret_cast(dst); + string_slot->data = reinterpret_cast(pool->allocate(type_desc.len)); + string_slot->size = type_desc.len; + memset(const_cast(string_slot->data), 0, type_desc.len); //! + memcpy(const_cast(string_slot->data), data_ref.data, data_ref.size); //! } else { auto str_ptr = pool->allocate(data_ref.size); memcpy(str_ptr, data_ref.data, data_ref.size); - auto string_slot = reinterpret_cast(dst); - string_slot->ptr = reinterpret_cast(str_ptr); - string_slot->len = data_ref.size; + auto string_slot = reinterpret_cast(dst); + string_slot->data = reinterpret_cast(str_ptr); + string_slot->size = data_ref.size; } } else { memcpy(dst, data_ref.data, data_ref.size); diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h index 332d310d57..7f0fefc0ad 100644 --- a/be/src/vec/core/types.h +++ b/be/src/vec/core/types.h @@ -144,8 +144,8 @@ struct TypeName { static const char* get() { return "uint24_t"; } }; template <> -struct TypeName { - static const char* get() { return "StringValue"; } +struct TypeName { + static const char* get() { return "StringRef"; } }; template <> diff --git a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp index d8107ba925..e00808fb63 100644 --- a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp +++ b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp @@ -22,9 +22,9 @@ #include "exec/exec_node.h" #include "gen_cpp/PlanNodes_types.h" #include "runtime/runtime_state.h" -#include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "util/runtime_profile.h" +#include "vec/common/string_ref.h" namespace doris::vectorized { diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 0c27d78add..339c150422 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -308,8 +308,8 @@ static std::tuple convert_to_orc_literal(const orc::Type* ty case orc::TypeKind::BINARY: case orc::TypeKind::CHAR: case orc::TypeKind::VARCHAR: { - StringValue* string_value = (StringValue*)value; - return std::make_tuple(true, orc::Literal(string_value->ptr, string_value->len)); + StringRef* string_value = (StringRef*)value; + return std::make_tuple(true, orc::Literal(string_value->data, string_value->size)); } case orc::TypeKind::DECIMAL: { int128_t decimal_value; diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h index 06db698a9c..49016b5266 100644 --- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h +++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h @@ -128,7 +128,7 @@ static bool _eval_in_val(const ColumnMinMaxParams& params) { case TYPE_CHAR: { std::vector in_values; for (auto val : params.in_pred_values) { - in_values.emplace_back(((StringValue*)val)->to_string()); + in_values.emplace_back(((StringRef*)val)->to_string()); } if (in_values.empty()) { return false; @@ -216,7 +216,7 @@ static bool _eval_eq(const ColumnMinMaxParams& params) { case TYPE_STRING: case TYPE_VARCHAR: case TYPE_CHAR: { - std::string conjunct_value = ((StringValue*)params.value)->to_string(); + std::string conjunct_value = ((StringRef*)params.value)->to_string(); if (strcmp(conjunct_value.data(), params.min_bytes) < 0 || strcmp(conjunct_value.data(), params.max_bytes) > 0) { return true; @@ -292,7 +292,7 @@ static bool _eval_gt(const ColumnMinMaxParams& params, bool is_eq) { case TYPE_STRING: case TYPE_VARCHAR: case TYPE_CHAR: { - std::string conjunct_value = ((StringValue*)params.value)->to_string(); + std::string conjunct_value = ((StringRef*)params.value)->to_string(); if (!is_eq && strcmp(params.max_bytes, conjunct_value.data()) <= 0) { return true; } else if (strcmp(params.max_bytes, conjunct_value.data()) < 0) { @@ -343,7 +343,7 @@ static bool _eval_lt(const ColumnMinMaxParams& params, bool is_eq) { case TYPE_STRING: case TYPE_VARCHAR: case TYPE_CHAR: { - std::string conjunct_value = ((StringValue*)params.value)->to_string(); + std::string conjunct_value = ((StringRef*)params.value)->to_string(); if (!is_eq && strcmp(params.min_bytes, conjunct_value.data()) >= 0) { return true; } else if (strcmp(params.min_bytes, conjunct_value.data()) > 0) { diff --git a/be/src/vec/exec/format/table/iceberg_reader.cpp b/be/src/vec/exec/format/table/iceberg_reader.cpp index 4848b0b1c6..5c9f1c52cb 100644 --- a/be/src/vec/exec/format/table/iceberg_reader.cpp +++ b/be/src/vec/exec/format/table/iceberg_reader.cpp @@ -302,7 +302,7 @@ Status IcebergTableReader::_position_delete( parquet_reader->set_delete_rows(&_delete_rows); COUNTER_UPDATE(_iceberg_profile.num_delete_rows, num_delete_rows); } - // the delete rows are copy out, we can erase them. + // the deleted rows are copy out, we can erase them. for (auto& erase_item : erase_data) { erase_item->erase(data_file_path); } diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index 21a38c6595..906f7a18d7 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -768,7 +768,7 @@ Status VScanNode::_normalize_in_and_eq_predicate(VExpr* expr, VExprContext* expr if (value.data != nullptr) { if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || T == TYPE_HLL) { - auto val = StringValue(value.data, value.size); + auto val = StringRef(value.data, value.size); RETURN_IF_ERROR(_change_value_range( temp_range, reinterpret_cast(&val), ColumnValueRange::add_fixed_value_range, fn_name)); @@ -845,7 +845,7 @@ Status VScanNode::_normalize_not_in_and_not_eq_predicate(VExpr* expr, VExprConte auto fn_name = std::string(""); if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || T == TYPE_HLL) { - auto val = StringValue(value.data, value.size); + auto val = StringRef(value.data, value.size); if (is_fixed_range) { RETURN_IF_ERROR(_change_value_range( range, reinterpret_cast(&val), @@ -934,7 +934,7 @@ Status VScanNode::_normalize_noneq_binary_predicate(VExpr* expr, VExprContext* e if (value.data != nullptr) { if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || T == TYPE_HLL) { - auto val = StringValue(value.data, value.size); + auto val = StringRef(value.data, value.size); RETURN_IF_ERROR(_change_value_range(range, reinterpret_cast(&val), ColumnValueRange::add_value_range, fn_name, slot_ref_child)); @@ -1047,7 +1047,7 @@ Status VScanNode::_normalize_binary_in_compound_predicate(vectorized::VExpr* exp if (value.data != nullptr) { if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || T == TYPE_HLL) { - auto val = StringValue(value.data, value.size); + auto val = StringRef(value.data, value.size); RETURN_IF_ERROR(_change_value_range( range, reinterpret_cast(&val), ColumnValueRange::add_compound_value_range, fn_name, @@ -1104,7 +1104,7 @@ Status VScanNode::_normalize_match_predicate(VExpr* expr, VExprContext* expr_ctx using CppType = typename PrimitiveTypeTraits::CppType; if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || T == TYPE_HLL) { - auto val = StringValue(value.data, value.size); + auto val = StringRef(value.data, value.size); ColumnValueRange::add_match_value_range(temp_range, to_match_type(expr->op()), reinterpret_cast(&val)); diff --git a/be/src/vec/exec/vbroker_scan_node.cpp b/be/src/vec/exec/vbroker_scan_node.cpp index 1b46e36022..d0ebe5c6cb 100644 --- a/be/src/vec/exec/vbroker_scan_node.cpp +++ b/be/src/vec/exec/vbroker_scan_node.cpp @@ -20,12 +20,12 @@ #include "gen_cpp/PlanNodes_types.h" #include "runtime/memory/mem_tracker.h" #include "runtime/runtime_state.h" -#include "runtime/string_value.h" #include "runtime/tuple.h" #include "runtime/tuple_row.h" #include "util/runtime_profile.h" #include "util/thread.h" #include "util/types.h" +#include "vec/common/string_ref.h" #include "vec/exec/vbroker_scanner.h" #include "vec/exec/vjson_scanner.h" #include "vec/exec/vorc_scanner.h" diff --git a/be/src/vec/exec/vbroker_scanner.cpp b/be/src/vec/exec/vbroker_scanner.cpp index 528218f548..c39b2003fb 100644 --- a/be/src/vec/exec/vbroker_scanner.cpp +++ b/be/src/vec/exec/vbroker_scanner.cpp @@ -232,9 +232,9 @@ Status VBrokerScanner::_line_to_src_tuple(const Slice& line) { } _src_tuple->set_not_null(slot_desc->null_indicator_offset()); void* slot = _src_tuple->get_slot(slot_desc->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = value.data; - str_slot->len = value.size; + Slice* str_slot = reinterpret_cast(slot); + str_slot->data = value.data; + str_slot->size = value.size; } const TBrokerRangeDesc& range = _ranges.at(_next_range - 1); diff --git a/be/src/vec/exec/vdata_gen_scan_node.cpp b/be/src/vec/exec/vdata_gen_scan_node.cpp index e91626d49c..f30a2a14f5 100644 --- a/be/src/vec/exec/vdata_gen_scan_node.cpp +++ b/be/src/vec/exec/vdata_gen_scan_node.cpp @@ -22,7 +22,6 @@ #include "common/status.h" #include "gen_cpp/PlanNodes_types.h" #include "runtime/runtime_state.h" -#include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "util/runtime_profile.h" #include "vec/exec/data_gen_functions/vnumbers_tvf.h" diff --git a/be/src/vec/exec/vjson_scanner.cpp b/be/src/vec/exec/vjson_scanner.cpp index dee107f06a..7585af4d98 100644 --- a/be/src/vec/exec/vjson_scanner.cpp +++ b/be/src/vec/exec/vjson_scanner.cpp @@ -1456,14 +1456,18 @@ std::string VJsonReader::_print_json_value(const rapidjson::Value& value) { return std::string(buffer.GetString()); } +// TODO: NEED TO REWRITE COMPLETELY. the way writing now is WRONG. +// StringRef shouldn't managing exclusive memory cause it will break RAII. +// besides, accessing object which is essentially const by non-const object +// is UB! void VJsonReader::_fill_slot(doris::Tuple* tuple, SlotDescriptor* slot_desc, MemPool* mem_pool, const uint8_t* value, int32_t len) { tuple->set_not_null(slot_desc->null_indicator_offset()); void* slot = tuple->get_slot(slot_desc->tuple_offset()); - StringValue* str_slot = reinterpret_cast(slot); - str_slot->ptr = reinterpret_cast(mem_pool->allocate(len)); - memcpy(str_slot->ptr, value, len); - str_slot->len = len; + StringRef* str_slot = reinterpret_cast(slot); + str_slot->data = reinterpret_cast(mem_pool->allocate(len)); + memcpy(const_cast(str_slot->data), value, len); + str_slot->size = len; } Status VJsonReader::_write_data_to_tuple(rapidjson::Value::ConstValueIterator value, diff --git a/be/src/vec/exec/vmysql_scan_node.cpp b/be/src/vec/exec/vmysql_scan_node.cpp index dcc7593ee9..a7a77f29db 100644 --- a/be/src/vec/exec/vmysql_scan_node.cpp +++ b/be/src/vec/exec/vmysql_scan_node.cpp @@ -21,10 +21,10 @@ #include "exec/text_converter.hpp" #include "gen_cpp/PlanNodes_types.h" #include "runtime/runtime_state.h" -#include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "util/runtime_profile.h" #include "util/types.h" +#include "vec/common/string_ref.h" namespace doris::vectorized { VMysqlScanNode::VMysqlScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) diff --git a/be/src/vec/exec/vschema_scan_node.cpp b/be/src/vec/exec/vschema_scan_node.cpp index 65588eb11b..f99de7ff85 100644 --- a/be/src/vec/exec/vschema_scan_node.cpp +++ b/be/src/vec/exec/vschema_scan_node.cpp @@ -21,10 +21,10 @@ #include "exec/text_converter.hpp" #include "gen_cpp/PlanNodes_types.h" #include "runtime/runtime_state.h" -#include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "util/runtime_profile.h" #include "util/types.h" +#include "vec/common/string_ref.h" #include "vec/core/types.h" namespace doris::vectorized { @@ -348,9 +348,9 @@ Status VSchemaScanNode::write_slot_to_vectorized_column(void* slot, SlotDescript case TYPE_VARCHAR: case TYPE_CHAR: case TYPE_STRING: { - StringValue* str_slot = reinterpret_cast(slot); - reinterpret_cast(col_ptr)->insert_data(str_slot->ptr, - str_slot->len); + StringRef* str_slot = reinterpret_cast(slot); + reinterpret_cast(col_ptr)->insert_data(str_slot->data, + str_slot->size); break; } diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.h b/be/src/vec/exprs/table_function/vexplode_json_array.h index 00531b9a22..ab171c7cef 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.h +++ b/be/src/vec/exprs/table_function/vexplode_json_array.h @@ -20,8 +20,8 @@ #include #include -#include "runtime/string_value.h" #include "vec/columns/column.h" +#include "vec/common/string_ref.h" #include "vec/exprs/table_function/table_function.h" namespace doris::vectorized { @@ -35,7 +35,7 @@ struct ParsedData { // The number parsed from json array // the `_backup` saved the real number entity. std::vector _data; - std::vector _data_string; + std::vector _data_string; std::vector _backup_int; std::vector _backup_double; std::vector _backup_string; diff --git a/be/src/vec/exprs/table_function/vexplode_split.h b/be/src/vec/exprs/table_function/vexplode_split.h index 63396bff91..53935b6a0b 100644 --- a/be/src/vec/exprs/table_function/vexplode_split.h +++ b/be/src/vec/exprs/table_function/vexplode_split.h @@ -18,8 +18,8 @@ #pragma once #include "gutil/strings/stringpiece.h" -#include "runtime/string_value.h" #include "vec/columns/column.h" +#include "vec/common/string_ref.h" #include "vec/exprs/table_function/table_function.h" namespace doris::vectorized { diff --git a/be/src/vec/exprs/vbloom_predicate.cpp b/be/src/vec/exprs/vbloom_predicate.cpp index 5cdaa048ed..f154ab7856 100644 --- a/be/src/vec/exprs/vbloom_predicate.cpp +++ b/be/src/vec/exprs/vbloom_predicate.cpp @@ -68,7 +68,7 @@ Status VBloomPredicate::execute(VExprContext* context, Block* block, int* result if (type.is_string_or_fixed_string()) { for (size_t i = 0; i < sz; i++) { auto ele = argument_column->get_data_at(i); - const StringValue v(ele.data, ele.size); + const StringRef v(ele.data, ele.size); ptr[i] = _filter->find(reinterpret_cast(&v)); } } else if (_be_exec_version > 0 && (type.is_int_or_uint() || type.is_float())) { diff --git a/be/src/vec/exprs/vdirect_in_predicate.h b/be/src/vec/exprs/vdirect_in_predicate.h index 458e6c0a0e..b717f00d18 100644 --- a/be/src/vec/exprs/vdirect_in_predicate.h +++ b/be/src/vec/exprs/vdirect_in_predicate.h @@ -48,7 +48,7 @@ public: if (type.is_string_or_fixed_string()) { for (size_t i = 0; i < sz; i++) { auto ele = argument_column->get_data_at(i); - StringValue v(ele.data, ele.size); + StringRef v(ele.data, ele.size); ptr[i] = _filter->find(reinterpret_cast(&v)); } } else if (type.is_int_or_uint() || type.is_float()) { diff --git a/be/src/vec/functions/function_regexp.cpp b/be/src/vec/functions/function_regexp.cpp index 0fc3681a1e..bd651e85c1 100644 --- a/be/src/vec/functions/function_regexp.cpp +++ b/be/src/vec/functions/function_regexp.cpp @@ -20,8 +20,8 @@ #include #include "exprs/string_functions.h" -#include "runtime/string_value.h" #include "udf/udf.h" +#include "vec/common/string_ref.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" #include "vec/functions/function_string.h" diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index ad8499dcc7..660dce0551 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -93,7 +93,7 @@ struct StringUtf8LengthImpl { for (int i = 0; i < size; ++i) { const char* raw_str = reinterpret_cast(&data[offsets[i - 1]]); int str_size = offsets[i] - offsets[i - 1]; - res[i] = get_char_len(StringValue(raw_str, str_size), str_size); + res[i] = get_char_len(StringRef(raw_str, str_size), str_size); } return Status::OK(); } @@ -186,8 +186,8 @@ struct InStrOP { return; } - StringValue str_sv(strl.data(), strl.length()); - StringValue substr_sv(strr.data(), strr.length()); + StringRef str_sv(strl.data(), strl.length()); + StringRef substr_sv(strr.data(), strr.length()); StringSearch search(&substr_sv); // Hive returns positions starting from 1. int loc = search.search(&str_sv); diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 0339bbb22f..06af3c6c19 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -66,6 +66,7 @@ namespace doris::vectorized { +//TODO: these three functions could be merged. inline size_t get_char_len(const std::string_view& str, std::vector* str_index) { size_t char_len = 0; for (size_t i = 0, char_size = 0; i < str.length(); i += char_size) { @@ -86,10 +87,10 @@ inline size_t get_char_len(const StringVal& str, std::vector* str_index) return char_len; } -inline size_t get_char_len(const StringValue& str, size_t end_pos) { +inline size_t get_char_len(const StringRef& str, size_t end_pos) { size_t char_len = 0; - for (size_t i = 0, char_size = 0; i < std::min(str.len, end_pos); i += char_size) { - char_size = UTF8_BYTE_LENGTH[(unsigned char)(str.ptr)[i]]; + for (size_t i = 0, char_size = 0; i < std::min(str.size, end_pos); i += char_size) { + char_size = UTF8_BYTE_LENGTH[(unsigned char)(str.data)[i]]; ++char_len; } return char_len; @@ -1809,7 +1810,7 @@ public: auto param = parameter_col->get_data_at(i); auto res = extract_url(source, param); - col_res->insert_data(res.ptr, res.len); + col_res->insert_data(res.data, res.size); } block.replace_by_position(result, std::move(col_res)); @@ -1817,11 +1818,11 @@ public: } private: - StringValue extract_url(StringRef url, StringRef parameter) { + StringRef extract_url(StringRef url, StringRef parameter) { if (url.size == 0 || parameter.size == 0) { - return StringValue("", 0); + return StringRef("", 0); } - return UrlParser::extract_url(StringValue(url), StringValue(parameter)); + return UrlParser::extract_url(url, parameter); } }; @@ -1875,18 +1876,18 @@ public: } auto part = part_col->get_data_at(i); - StringValue p(const_cast(part.data), part.size); + StringRef p(const_cast(part.data), part.size); UrlParser::UrlPart url_part = UrlParser::get_url_part(p); - StringValue url_key; + StringRef url_key; if (has_key) { auto key = key_col->get_data_at(i); - url_key = StringValue(const_cast(key.data), key.size); + url_key = StringRef(const_cast(key.data), key.size); } auto source = url_col->get_data_at(i); - StringValue url_val(const_cast(source.data), source.size); + StringRef url_val(const_cast(source.data), source.size); - StringValue parse_res; + StringRef parse_res; bool success = false; if (has_key) { success = UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res); @@ -1908,7 +1909,7 @@ public: } } - StringOP::push_value_string(std::string_view(parse_res.ptr, parse_res.len), i, + StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, res_chars, res_offsets); } block.get_by_position(result).column = @@ -2136,11 +2137,11 @@ private: if (start_pos <= 0 || start_pos > str.len || start_pos > char_len) { return 0; } - StringValue substr_sv = StringValue::from_string_val(substr); + StringRef substr_sv = StringRef(substr); StringSearch search(&substr_sv); // Input start_pos starts from 1. - StringValue adjusted_str(reinterpret_cast(str.ptr) + index[start_pos - 1], - str.len - index[start_pos - 1]); + StringRef adjusted_str(reinterpret_cast(str.ptr) + index[start_pos - 1], + str.len - index[start_pos - 1]); int32_t match_pos = search.search(&adjusted_str); if (match_pos >= 0) { // Hive returns the position in the original string starting from 1. diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp index 4e96389b81..e8b0ad6321 100644 --- a/be/src/vec/functions/like.cpp +++ b/be/src/vec/functions/like.cpp @@ -17,9 +17,8 @@ #include "vec/functions/like.h" -#include "runtime/string_value.h" -#include "runtime/string_value.hpp" #include "vec/columns/columns_number.h" +#include "vec/common/string_ref.h" #include "vec/functions/simple_function_factory.h" namespace doris::vectorized { @@ -64,7 +63,7 @@ Status LikeSearchState::clone(LikeSearchState& cloned) { } Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, + const StringRef& pattern, ColumnUInt8::Container& result) { auto sz = val.size(); for (size_t i = 0; i < sz; i++) { @@ -76,7 +75,7 @@ Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const C } Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, + const StringRef& pattern, ColumnUInt8::Container& result) { auto sz = val.size(); for (size_t i = 0; i < sz; i++) { @@ -88,7 +87,7 @@ Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const Col } Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, + const StringRef& pattern, ColumnUInt8::Container& result) { auto sz = val.size(); for (size_t i = 0; i < sz; i++) { @@ -98,7 +97,7 @@ Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const Column } Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, + const StringRef& pattern, ColumnUInt8::Container& result) { auto sz = val.size(); for (size_t i = 0; i < sz; i++) { @@ -112,7 +111,7 @@ Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const Col Status FunctionLikeBase::constant_starts_with_fn_predicate( LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) { + const StringRef& pattern, ColumnUInt8::Container& result, const uint16_t* sel, size_t sz) { auto data_ptr = reinterpret_cast(val.get_data().data()); for (size_t i = 0; i < sz; i++) { result[i] = (data_ptr[sel[i]].size >= state->search_string_sv.size) && @@ -124,7 +123,7 @@ Status FunctionLikeBase::constant_starts_with_fn_predicate( Status FunctionLikeBase::constant_ends_with_fn_predicate( LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) { + const StringRef& pattern, ColumnUInt8::Container& result, const uint16_t* sel, size_t sz) { auto data_ptr = reinterpret_cast(val.get_data().data()); for (size_t i = 0; i < sz; i++) { result[i] = @@ -138,9 +137,9 @@ Status FunctionLikeBase::constant_ends_with_fn_predicate( Status FunctionLikeBase::constant_equals_fn_predicate(LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, - ColumnUInt8::Container& result, uint16_t* sel, - size_t sz) { + const StringRef& pattern, + ColumnUInt8::Container& result, + const uint16_t* sel, size_t sz) { auto data_ptr = reinterpret_cast(val.get_data().data()); for (size_t i = 0; i < sz; i++) { result[i] = (data_ptr[sel[i]] == state->search_string_sv); @@ -150,7 +149,7 @@ Status FunctionLikeBase::constant_equals_fn_predicate(LikeSearchState* state, Status FunctionLikeBase::constant_substring_fn_predicate( LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) { + const StringRef& pattern, ColumnUInt8::Container& result, const uint16_t* sel, size_t sz) { auto data_ptr = reinterpret_cast(val.get_data().data()); for (size_t i = 0; i < sz; i++) { if (state->search_string_sv.size == 0) { @@ -163,7 +162,7 @@ Status FunctionLikeBase::constant_substring_fn_predicate( Status FunctionLikeBase::constant_starts_with_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringValue& pattern, + const StringRef& pattern, unsigned char* result) { *result = (val.size >= state->search_string_sv.size) && (state->search_string_sv == val.substring(0, state->search_string_sv.size)); @@ -171,7 +170,7 @@ Status FunctionLikeBase::constant_starts_with_fn_scalar(LikeSearchState* state, } Status FunctionLikeBase::constant_ends_with_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringValue& pattern, + const StringRef& pattern, unsigned char* result) { *result = (val.size >= state->search_string_sv.size) && (state->search_string_sv == val.substring(val.size - state->search_string_sv.size, @@ -180,14 +179,14 @@ Status FunctionLikeBase::constant_ends_with_fn_scalar(LikeSearchState* state, co } Status FunctionLikeBase::constant_equals_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringValue& pattern, + const StringRef& pattern, unsigned char* result) { *result = (val == state->search_string_sv); return Status::OK(); } Status FunctionLikeBase::constant_substring_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringValue& pattern, + const StringRef& pattern, unsigned char* result) { if (state->search_string_sv.size == 0) { *result = true; @@ -198,10 +197,9 @@ Status FunctionLikeBase::constant_substring_fn_scalar(LikeSearchState* state, co } Status FunctionLikeBase::constant_regex_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringValue& pattern, - unsigned char* result) { + const StringRef& pattern, unsigned char* result) { auto ret = hs_scan(state->hs_database.get(), val.data, val.size, 0, state->hs_scratch.get(), - state->hs_match_handler, (void*)result); + doris::vectorized::LikeSearchState::hs_match_handler, (void*)result); if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) { return Status::RuntimeError(fmt::format("hyperscan error: {}", ret)); } @@ -210,15 +208,15 @@ Status FunctionLikeBase::constant_regex_fn_scalar(LikeSearchState* state, const } Status FunctionLikeBase::regexp_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringValue& pattern, unsigned char* result) { - std::string_view re_pattern(pattern.ptr, pattern.len); + const StringRef& pattern, unsigned char* result) { + std::string_view re_pattern(pattern.data, pattern.size); hs_database_t* database = nullptr; hs_scratch_t* scratch = nullptr; RETURN_IF_ERROR(hs_prepare(nullptr, re_pattern.data(), &database, &scratch)); - auto ret = hs_scan(database, val.data, val.size, 0, scratch, state->hs_match_handler, - (void*)result); + auto ret = hs_scan(database, val.data, val.size, 0, scratch, + doris::vectorized::LikeSearchState::hs_match_handler, (void*)result); if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) { return Status::RuntimeError(fmt::format("hyperscan error: {}", ret)); } @@ -230,14 +228,14 @@ Status FunctionLikeBase::regexp_fn_scalar(LikeSearchState* state, const StringRe } Status FunctionLikeBase::constant_regex_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, + const StringRef& pattern, ColumnUInt8::Container& result) { auto sz = val.size(); for (size_t i = 0; i < sz; i++) { const auto& str_ref = val.get_data_at(i); - auto ret = hs_scan(state->hs_database.get(), str_ref.data, str_ref.size, 0, - state->hs_scratch.get(), state->hs_match_handler, - (void*)(result.data() + i)); + auto ret = hs_scan( + state->hs_database.get(), str_ref.data, str_ref.size, 0, state->hs_scratch.get(), + doris::vectorized::LikeSearchState::hs_match_handler, (void*)(result.data() + i)); if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) { return Status::RuntimeError(fmt::format("hyperscan error: {}", ret)); } @@ -247,8 +245,8 @@ Status FunctionLikeBase::constant_regex_fn(LikeSearchState* state, const ColumnS } Status FunctionLikeBase::regexp_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, ColumnUInt8::Container& result) { - std::string_view re_pattern(pattern.ptr, pattern.len); + const StringRef& pattern, ColumnUInt8::Container& result) { + std::string_view re_pattern(pattern.data, pattern.size); hs_database_t* database = nullptr; hs_scratch_t* scratch = nullptr; @@ -258,7 +256,8 @@ Status FunctionLikeBase::regexp_fn(LikeSearchState* state, const ColumnString& v for (size_t i = 0; i < sz; i++) { const auto& str_ref = val.get_data_at(i); auto ret = hs_scan(database, str_ref.data, str_ref.size, 0, scratch, - state->hs_match_handler, (void*)(result.data() + i)); + doris::vectorized::LikeSearchState::hs_match_handler, + (void*)(result.data() + i)); if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) { return Status::RuntimeError(fmt::format("hyperscan error: {}", ret)); } @@ -272,13 +271,14 @@ Status FunctionLikeBase::regexp_fn(LikeSearchState* state, const ColumnString& v Status FunctionLikeBase::constant_regex_fn_predicate(LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, - ColumnUInt8::Container& result, uint16_t* sel, - size_t sz) { + const StringRef& pattern, + ColumnUInt8::Container& result, + const uint16_t* sel, size_t sz) { auto data_ptr = reinterpret_cast(val.get_data().data()); for (size_t i = 0; i < sz; i++) { auto ret = hs_scan(state->hs_database.get(), data_ptr[sel[i]].data, data_ptr[sel[i]].size, - 0, state->hs_scratch.get(), state->hs_match_handler, + 0, state->hs_scratch.get(), + doris::vectorized::LikeSearchState::hs_match_handler, (void*)(result.data() + i)); if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) { return Status::RuntimeError(fmt::format("hyperscan error: {}", ret)); @@ -290,10 +290,10 @@ Status FunctionLikeBase::constant_regex_fn_predicate(LikeSearchState* state, Status FunctionLikeBase::regexp_fn_predicate(LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, - ColumnUInt8::Container& result, uint16_t* sel, + const StringRef& pattern, + ColumnUInt8::Container& result, const uint16_t* sel, size_t sz) { - std::string_view re_pattern(pattern.ptr, pattern.len); + std::string_view re_pattern(pattern.data, pattern.size); hs_database_t* database = nullptr; hs_scratch_t* scratch = nullptr; @@ -302,7 +302,8 @@ Status FunctionLikeBase::regexp_fn_predicate(LikeSearchState* state, auto data_ptr = reinterpret_cast(val.get_data().data()); for (size_t i = 0; i < sz; i++) { auto ret = hs_scan(database, data_ptr[sel[i]].data, data_ptr[sel[i]].size, 0, scratch, - state->hs_match_handler, (void*)(result.data() + i)); + doris::vectorized::LikeSearchState::hs_match_handler, + (void*)(result.data() + i)); if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) { return Status::RuntimeError(fmt::format("hyperscan error: {}", ret)); } @@ -318,11 +319,13 @@ Status FunctionLikeBase::regexp_fn_predicate(LikeSearchState* state, Status FunctionLikeBase::hs_prepare(FunctionContext* context, const char* expression, hs_database_t** database, hs_scratch_t** scratch) { hs_compile_error_t* compile_err; - auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY, HS_MODE_BLOCK, NULL, + auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY, HS_MODE_BLOCK, nullptr, database, &compile_err); if (res != HS_SUCCESS) { *database = nullptr; - if (context) context->set_error("hs_compile regex pattern error"); + if (context) { + context->set_error("hs_compile regex pattern error"); + } return Status::RuntimeError("hs_compile regex pattern error:" + std::string(compile_err->message)); hs_free_compile_error(compile_err); @@ -333,7 +336,9 @@ Status FunctionLikeBase::hs_prepare(FunctionContext* context, const char* expres hs_free_database(*database); *database = nullptr; *scratch = nullptr; - if (context) context->set_error("hs_alloc_scratch allocate scratch space error"); + if (context) { + context->set_error("hs_alloc_scratch allocate scratch space error"); + } return Status::RuntimeError("hs_alloc_scratch allocate scratch space error"); } @@ -359,9 +364,8 @@ Status FunctionLikeBase::execute_impl(FunctionContext* context, Block& block, context->get_function_state(FunctionContext::THREAD_LOCAL)); // for constant_substring_fn, use long run length search for performance if (constant_substring_fn == - *(state->function - .target())) { + *(state->function.target())) { RETURN_IF_ERROR(execute_substring(values->get_chars(), values->get_offsets(), vec_res, &state->search_state)); } else { @@ -412,11 +416,15 @@ Status FunctionLikeBase::execute_substring(const ColumnString::Chars& values, while (pos < end) { // search return matched substring start offset pos = (UInt8*)search_state->substring_pattern.search((char*)pos, end - pos); - if (pos >= end) break; + if (pos >= end) { + break; + } /// Determine which index it refers to. /// begin + value_offsets[i] is the start offset of string at i+1 - while (begin + value_offsets[i] < pos) ++i; + while (begin + value_offsets[i] < pos) { + ++i; + } /// We check that the entry does not pass through the boundaries of strings. if (pos + needle_size <= begin + value_offsets[i]) { @@ -435,36 +443,36 @@ Status FunctionLikeBase::vector_const(const ColumnString& values, const StringRe ColumnUInt8::Container& result, const LikeFn& function, LikeSearchState* search_state) { RETURN_IF_ERROR((function)(search_state, values, - *reinterpret_cast(pattern_val), result)); + *reinterpret_cast(pattern_val), result)); return Status::OK(); } Status FunctionLike::like_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, ColumnUInt8::Container& result) { + const StringRef& pattern, ColumnUInt8::Container& result) { std::string re_pattern; - convert_like_pattern(state, std::string(pattern.ptr, pattern.len), &re_pattern); + convert_like_pattern(state, std::string(pattern.data, pattern.size), &re_pattern); - return regexp_fn(state, val, {re_pattern.c_str(), (int)re_pattern.size()}, result); + return regexp_fn(state, val, {re_pattern.c_str(), re_pattern.size()}, result); } Status FunctionLike::like_fn_predicate(LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, ColumnUInt8::Container& result, + const StringRef& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz) { std::string re_pattern; - convert_like_pattern(state, std::string(pattern.ptr, pattern.len), &re_pattern); + convert_like_pattern(state, std::string(pattern.data, pattern.size), &re_pattern); - return regexp_fn_predicate(state, val, {re_pattern.c_str(), (int)re_pattern.size()}, result, - sel, sz); + return regexp_fn_predicate(state, val, {re_pattern.c_str(), re_pattern.size()}, result, sel, + sz); } -Status FunctionLike::like_fn_scalar(LikeSearchState* state, const StringValue& val, - const StringValue& pattern, unsigned char* result) { +Status FunctionLike::like_fn_scalar(LikeSearchState* state, const StringRef& val, + const StringRef& pattern, unsigned char* result) { std::string re_pattern; - convert_like_pattern(state, std::string(pattern.ptr, pattern.len), &re_pattern); + convert_like_pattern(state, std::string(pattern.data, pattern.size), &re_pattern); - return regexp_fn_scalar(state, StringRef(val.ptr, val.len), - {re_pattern.c_str(), (int)re_pattern.size()}, result); + return regexp_fn_scalar(state, StringRef(val.data, val.size), + {re_pattern.c_str(), re_pattern.size()}, result); } void FunctionLike::convert_like_pattern(LikeSearchState* state, const std::string& pattern, diff --git a/be/src/vec/functions/like.h b/be/src/vec/functions/like.h index b44831a93d..40235faafa 100644 --- a/be/src/vec/functions/like.h +++ b/be/src/vec/functions/like.h @@ -23,11 +23,11 @@ #include #include "runtime/string_search.hpp" -#include "runtime/string_value.h" #include "vec/columns/column_const.h" #include "vec/columns/column_set.h" #include "vec/columns/columns_number.h" #include "vec/columns/predicate_column.h" +#include "vec/common/string_ref.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_number.h" #include "vec/exprs/vexpr.h" @@ -41,7 +41,7 @@ namespace doris::vectorized { struct LikeSearchState { char escape_char; - /// Holds the string the StringValue points to and is set any time StringValue is + /// Holds the string the StringRef points to and is set any time StringRef is /// used. std::string search_string; @@ -97,15 +97,15 @@ struct LikeSearchState { } }; -using LikeFn = std::function; +using LikeFn = std::function; using LikePredicateFn = std::function&, const StringValue&, + LikeSearchState*, const PredicateColumnType&, const StringRef&, ColumnUInt8::Container&, uint16_t* sel, size_t sz)>; using ScalarLikeFn = std::function; + const StringRef&, unsigned char*)>; struct LikeState { LikeSearchState search_state; @@ -140,77 +140,76 @@ protected: ColumnUInt8::Container& result, LikeSearchState* search_state); static Status constant_starts_with_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, - ColumnUInt8::Container& result); + const StringRef& pattern, ColumnUInt8::Container& result); static Status constant_ends_with_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, ColumnUInt8::Container& result); + const StringRef& pattern, ColumnUInt8::Container& result); static Status constant_equals_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, ColumnUInt8::Container& result); + const StringRef& pattern, ColumnUInt8::Container& result); static Status constant_substring_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, ColumnUInt8::Container& result); + const StringRef& pattern, ColumnUInt8::Container& result); static Status constant_regex_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, ColumnUInt8::Container& result); + const StringRef& pattern, ColumnUInt8::Container& result); static Status regexp_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, ColumnUInt8::Container& result); + const StringRef& pattern, ColumnUInt8::Container& result); // These functions below are used only for predicate. static Status constant_regex_fn_predicate(LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, - ColumnUInt8::Container& result, uint16_t* sel, + const StringRef& pattern, + ColumnUInt8::Container& result, const uint16_t* sel, size_t sz); static Status regexp_fn_predicate(LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, ColumnUInt8::Container& result, - uint16_t* sel, size_t sz); + const StringRef& pattern, ColumnUInt8::Container& result, + const uint16_t* sel, size_t sz); static Status constant_starts_with_fn_predicate(LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, - ColumnUInt8::Container& result, uint16_t* sel, - size_t sz); + const StringRef& pattern, + ColumnUInt8::Container& result, + const uint16_t* sel, size_t sz); static Status constant_ends_with_fn_predicate(LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, - ColumnUInt8::Container& result, uint16_t* sel, - size_t sz); + const StringRef& pattern, + ColumnUInt8::Container& result, + const uint16_t* sel, size_t sz); static Status constant_equals_fn_predicate(LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, - ColumnUInt8::Container& result, uint16_t* sel, + const StringRef& pattern, + ColumnUInt8::Container& result, const uint16_t* sel, size_t sz); static Status constant_substring_fn_predicate(LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, - ColumnUInt8::Container& result, uint16_t* sel, - size_t sz); + const StringRef& pattern, + ColumnUInt8::Container& result, + const uint16_t* sel, size_t sz); static Status constant_starts_with_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringValue& pattern, unsigned char* result); + const StringRef& pattern, unsigned char* result); static Status constant_ends_with_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringValue& pattern, unsigned char* result); + const StringRef& pattern, unsigned char* result); static Status constant_equals_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringValue& pattern, unsigned char* result); + const StringRef& pattern, unsigned char* result); static Status constant_substring_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringValue& pattern, unsigned char* result); + const StringRef& pattern, unsigned char* result); static Status constant_regex_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringValue& pattern, unsigned char* result); + const StringRef& pattern, unsigned char* result); static Status regexp_fn_scalar(LikeSearchState* state, const StringRef& val, - const StringValue& pattern, unsigned char* result); + const StringRef& pattern, unsigned char* result); // hyperscan compile expression to database and allocate scratch space static Status hs_prepare(FunctionContext* context, const char* expression, @@ -230,16 +229,16 @@ public: friend struct LikeSearchState; private: - static Status like_fn(LikeSearchState* state, const ColumnString& val, - const StringValue& pattern, ColumnUInt8::Container& result); + static Status like_fn(LikeSearchState* state, const ColumnString& val, const StringRef& pattern, + ColumnUInt8::Container& result); static Status like_fn_predicate(LikeSearchState* state, const PredicateColumnType& val, - const StringValue& pattern, ColumnUInt8::Container& result, + const StringRef& pattern, ColumnUInt8::Container& result, uint16_t* sel, size_t sz); - static Status like_fn_scalar(LikeSearchState* state, const StringValue& val, - const StringValue& pattern, unsigned char* result); + static Status like_fn_scalar(LikeSearchState* state, const StringRef& val, + const StringRef& pattern, unsigned char* result); static void convert_like_pattern(LikeSearchState* state, const std::string& pattern, std::string* re_pattern); diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h index 990b67fd62..5c848188c5 100644 --- a/be/src/vec/io/io_helper.h +++ b/be/src/vec/io/io_helper.h @@ -37,7 +37,7 @@ namespace doris::vectorized { // Define in the namespace and avoid defining global macros, -// because it maybe conflict with other libs +// because it maybe conflicts with other libs static constexpr size_t DEFAULT_MAX_STRING_SIZE = 1073741824; // 1GB static constexpr size_t DEFAULT_MAX_JSON_SIZE = 1073741824; // 1GB static constexpr auto WRITE_HELPERS_MAX_INT_WIDTH = 40U; @@ -134,7 +134,7 @@ inline void write_string_binary(const StringRef& s, BufferWritable& buf) { } inline void write_string_binary(const char* s, BufferWritable& buf) { - write_string_binary(StringRef {s}, buf); + write_string_binary(StringRef {std::string(s)}, buf); } inline void write_json_binary(JsonbField s, BufferWritable& buf) { diff --git a/be/src/vec/olap/vertical_merge_iterator.cpp b/be/src/vec/olap/vertical_merge_iterator.cpp index 7523dc3237..605e110e06 100644 --- a/be/src/vec/olap/vertical_merge_iterator.cpp +++ b/be/src/vec/olap/vertical_merge_iterator.cpp @@ -28,11 +28,11 @@ RowSource::RowSource(uint16_t source_num, bool agg_flag) { _data = agg_flag ? (_data | AGG_FLAG) : (_data & SOURCE_FLAG); } -uint16_t RowSource::get_source_num() { +uint16_t RowSource::get_source_num() const { return _data & SOURCE_FLAG; } -bool RowSource::agg_flag() { +bool RowSource::agg_flag() const { return (_data & AGG_FLAG) != 0; } @@ -605,8 +605,7 @@ std::shared_ptr new_vertical_heap_merge_iterator( std::shared_ptr new_vertical_mask_merge_iterator( const std::vector& inputs, size_t ori_return_cols, RowSourcesBuffer* row_sources) { - return std::make_shared(std::move(inputs), ori_return_cols, - row_sources); + return std::make_shared(inputs, ori_return_cols, row_sources); } } // namespace vectorized diff --git a/be/src/vec/olap/vertical_merge_iterator.h b/be/src/vec/olap/vertical_merge_iterator.h index 6ad5613f36..a7712192b4 100644 --- a/be/src/vec/olap/vertical_merge_iterator.h +++ b/be/src/vec/olap/vertical_merge_iterator.h @@ -37,8 +37,8 @@ public: RowSource(uint16_t data) : _data(data) {} RowSource(uint16_t source_num, bool agg_flag); - uint16_t get_source_num(); - bool agg_flag(); + uint16_t get_source_num() const; + bool agg_flag() const; void set_agg_flag(bool agg_flag); uint16_t data() const; diff --git a/be/src/vec/runtime/vfile_result_writer.cpp b/be/src/vec/runtime/vfile_result_writer.cpp index c2778543ba..256bb8bf19 100644 --- a/be/src/vec/runtime/vfile_result_writer.cpp +++ b/be/src/vec/runtime/vfile_result_writer.cpp @@ -28,11 +28,11 @@ #include "runtime/descriptors.h" #include "runtime/large_int_value.h" #include "runtime/runtime_state.h" -#include "runtime/string_value.h" #include "service/backend_options.h" #include "util/file_utils.h" #include "util/mysql_global.h" #include "util/mysql_row_buffer.h" +#include "vec/common/string_ref.h" #include "vec/core/block.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" diff --git a/be/src/vec/runtime/vorc_writer.cpp b/be/src/vec/runtime/vorc_writer.cpp index 4129a86831..5737d0bf94 100644 --- a/be/src/vec/runtime/vorc_writer.cpp +++ b/be/src/vec/runtime/vorc_writer.cpp @@ -158,12 +158,12 @@ void VOrcWriterWrapper::close() { cur_batch->notNull[row_id] = 1; \ int len = binary_cast( \ assert_cast&>(*col).get_data()[row_id]) \ - .to_buffer(buffer.ptr); \ + .to_buffer((char*)buffer.ptr); \ while (buffer.len < offset + len) { \ char* new_ptr = (char*)malloc(buffer.len + BUFFER_UNIT_SIZE); \ memcpy(new_ptr, buffer.ptr, buffer.len); \ free(buffer.ptr); \ - buffer.ptr = new_ptr; \ + buffer.ptr = (uint8_t*)new_ptr; \ buffer.len = buffer.len + BUFFER_UNIT_SIZE; \ } \ cur_batch->length[row_id] = len; \ @@ -175,7 +175,7 @@ void VOrcWriterWrapper::close() { if (null_data[row_id] != 0) { \ cur_batch->notNull[row_id] = 0; \ } else { \ - cur_batch->data[row_id] = buffer.ptr + offset; \ + cur_batch->data[row_id] = (char*)buffer.ptr + offset; \ offset += cur_batch->length[row_id]; \ } \ } \ @@ -183,12 +183,12 @@ void VOrcWriterWrapper::close() { check_and_get_column>(col)) { \ for (size_t row_id = 0; row_id < sz; row_id++) { \ int len = binary_cast(not_null_column->get_data()[row_id]) \ - .to_buffer(buffer.ptr); \ + .to_buffer((char*)buffer.ptr); \ while (buffer.len < offset + len) { \ char* new_ptr = (char*)malloc(buffer.len + BUFFER_UNIT_SIZE); \ memcpy(new_ptr, buffer.ptr, buffer.len); \ free(buffer.ptr); \ - buffer.ptr = new_ptr; \ + buffer.ptr = (uint8_t*)new_ptr; \ buffer.len = buffer.len + BUFFER_UNIT_SIZE; \ } \ cur_batch->length[row_id] = len; \ @@ -196,7 +196,7 @@ void VOrcWriterWrapper::close() { } \ offset = 0; \ for (size_t row_id = 0; row_id < sz; row_id++) { \ - cur_batch->data[row_id] = buffer.ptr + offset; \ + cur_batch->data[row_id] = (char*)buffer.ptr + offset; \ offset += cur_batch->length[row_id]; \ } \ } else { \ @@ -258,7 +258,7 @@ Status VOrcWriterWrapper::write(const Block& block) { // Buffer used by date type char* ptr = (char*)malloc(BUFFER_UNIT_SIZE); - StringValue buffer(ptr, BUFFER_UNIT_SIZE); + ::StringVal buffer(ptr, BUFFER_UNIT_SIZE); size_t sz = block.rows(); auto row_batch = _create_row_batch(sz); @@ -347,12 +347,12 @@ Status VOrcWriterWrapper::write(const Block& block) { int len = binary_cast>( assert_cast&>(*col) .get_data()[row_id]) - .to_buffer(buffer.ptr, output_scale); + .to_buffer((char*)buffer.ptr, output_scale); while (buffer.len < offset + len) { char* new_ptr = (char*)malloc(buffer.len + BUFFER_UNIT_SIZE); memcpy(new_ptr, buffer.ptr, buffer.len); free(buffer.ptr); - buffer.ptr = new_ptr; + buffer.ptr = (uint8_t*)new_ptr; buffer.len = buffer.len + BUFFER_UNIT_SIZE; } cur_batch->length[row_id] = len; @@ -364,7 +364,7 @@ Status VOrcWriterWrapper::write(const Block& block) { if (null_data[row_id] != 0) { cur_batch->notNull[row_id] = 0; } else { - cur_batch->data[row_id] = buffer.ptr + offset; + cur_batch->data[row_id] = (char*)buffer.ptr + offset; offset += cur_batch->length[row_id]; } } @@ -374,12 +374,12 @@ Status VOrcWriterWrapper::write(const Block& block) { int output_scale = _output_vexpr_ctxs[i]->root()->type().scale; int len = binary_cast>( not_null_column->get_data()[row_id]) - .to_buffer(buffer.ptr, output_scale); + .to_buffer((char*)buffer.ptr, output_scale); while (buffer.len < offset + len) { char* new_ptr = (char*)malloc(buffer.len + BUFFER_UNIT_SIZE); memcpy(new_ptr, buffer.ptr, buffer.len); free(buffer.ptr); - buffer.ptr = new_ptr; + buffer.ptr = (uint8_t*)new_ptr; buffer.len = buffer.len + BUFFER_UNIT_SIZE; } cur_batch->length[row_id] = len; @@ -387,7 +387,7 @@ Status VOrcWriterWrapper::write(const Block& block) { } offset = 0; for (size_t row_id = 0; row_id < sz; row_id++) { - cur_batch->data[row_id] = buffer.ptr + offset; + cur_batch->data[row_id] = (char*)buffer.ptr + offset; offset += cur_batch->length[row_id]; } } else { diff --git a/be/test/exprs/bitmap_function_test.cpp b/be/test/exprs/bitmap_function_test.cpp index d76e30f333..aa3150995d 100644 --- a/be/test/exprs/bitmap_function_test.cpp +++ b/be/test/exprs/bitmap_function_test.cpp @@ -310,8 +310,7 @@ TEST_F(BitmapFunctionsTest, test_bitmap_intersect) { date_time_value.to_datetime_val(&datetime2); test_bitmap_intersect(ctx, datetime1, datetime2); - test_bitmap_intersect(ctx, StringVal("20191211"), - StringVal("20191212")); + test_bitmap_intersect(ctx, StringVal("20191211"), StringVal("20191212")); } TEST_F(BitmapFunctionsTest, bitmap_or) { diff --git a/be/test/exprs/bloom_filter_predicate_test.cpp b/be/test/exprs/bloom_filter_predicate_test.cpp index 3f2fec11bc..ef87562b86 100644 --- a/be/test/exprs/bloom_filter_predicate_test.cpp +++ b/be/test/exprs/bloom_filter_predicate_test.cpp @@ -20,7 +20,7 @@ #include "exprs/bloomfilter_predicate.h" #include "exprs/create_predicate_function.h" #include "gtest/gtest.h" -#include "runtime/string_value.h" +#include "vec/common/string_ref.h" namespace doris { class BloomFilterPredicateTest : public testing::Test { @@ -55,10 +55,10 @@ TEST_F(BloomFilterPredicateTest, bloom_filter_func_stringval_test) { EXPECT_TRUE(func->init(1024, 0.05).ok()); ObjectPool obj_pool; const int data_size = 1024; - StringValue data[data_size]; + StringRef data[data_size]; for (int i = 0; i < data_size; i++) { auto str = obj_pool.add(new std::string(std::to_string(i))); - data[i] = StringValue(*str); + data[i] = StringRef(*str); func->insert((const void*)&data[i]); } for (int i = 0; i < data_size; i++) { @@ -66,7 +66,7 @@ TEST_F(BloomFilterPredicateTest, bloom_filter_func_stringval_test) { } // test not exist value std::string not_exist_str = "0x3355ff"; - StringValue not_exist_val(not_exist_str); + StringRef not_exist_val(not_exist_str); EXPECT_FALSE(func->find((const void*)¬_exist_val)); // test fixed char @@ -74,24 +74,24 @@ TEST_F(BloomFilterPredicateTest, bloom_filter_func_stringval_test) { EXPECT_TRUE(func->init(1024, 0.05).ok()); auto varchar_true_str = obj_pool.add(new std::string("true")); - StringValue varchar_true(*varchar_true_str); + StringRef varchar_true(*varchar_true_str); func->insert((const void*)&varchar_true); auto varchar_false_str = obj_pool.add(new std::string("false")); - StringValue varchar_false(*varchar_false_str); + StringRef varchar_false(*varchar_false_str); func->insert((const void*)&varchar_false); - StringValue fixed_char_true; + StringRef fixed_char_true; char true_buf[100] = "true"; memset(true_buf + strlen(true_buf), 0, 100 - strlen(true_buf)); - fixed_char_true.ptr = true_buf; - fixed_char_true.len = 10; + fixed_char_true.data = true_buf; + fixed_char_true.size = 10; - StringValue fixed_char_false; + StringRef fixed_char_false; char false_buf[100] = "false"; memset(false_buf + strlen(false_buf), 0, 100 - strlen(false_buf)); - fixed_char_false.ptr = false_buf; - fixed_char_false.len = 10; + fixed_char_false.data = false_buf; + fixed_char_false.size = 10; EXPECT_TRUE(func->find_olap_engine((const void*)&fixed_char_true)); EXPECT_TRUE(func->find_olap_engine((const void*)&fixed_char_false)); diff --git a/be/test/exprs/hybrid_set_test.cpp b/be/test/exprs/hybrid_set_test.cpp index 83e05c6ce3..33ea07d2f1 100644 --- a/be/test/exprs/hybrid_set_test.cpp +++ b/be/test/exprs/hybrid_set_test.cpp @@ -278,52 +278,52 @@ TEST_F(HybridSetTest, double) { } TEST_F(HybridSetTest, string) { HybridSetBase* set = create_set(TYPE_VARCHAR); - StringValue a; + StringRef a; char buf[100]; snprintf(buf, 100, "abcdefghigk"); - a.ptr = buf; + a.data = buf; - a.len = 0; + a.size = 0; set->insert(&a); - a.len = 1; + a.size = 1; set->insert(&a); - a.len = 2; + a.size = 2; set->insert(&a); - a.len = 3; + a.size = 3; set->insert(&a); - a.len = 4; + a.size = 4; set->insert(&a); - a.len = 4; + a.size = 4; set->insert(&a); EXPECT_EQ(5, set->size()); HybridSetBase::IteratorBase* base = set->begin(); while (base->has_next()) { - LOG(INFO) << ((StringValue*)base->get_value())->ptr; + LOG(INFO) << ((StringRef*)base->get_value())->data; base->next(); } - StringValue b; + StringRef b; char buf1[100]; snprintf(buf1, 100, "abcdefghigk"); - b.ptr = buf1; + b.data = buf1; - b.len = 0; + b.size = 0; EXPECT_TRUE(set->find(&b)); - b.len = 1; + b.size = 1; EXPECT_TRUE(set->find(&b)); - b.len = 2; + b.size = 2; EXPECT_TRUE(set->find(&b)); - b.len = 3; + b.size = 3; EXPECT_TRUE(set->find(&b)); - b.len = 4; + b.size = 4; EXPECT_TRUE(set->find(&b)); - b.len = 5; + b.size = 5; EXPECT_FALSE(set->find(&b)); } TEST_F(HybridSetTest, timestamp) { diff --git a/be/test/olap/block_column_predicate_test.cpp b/be/test/olap/block_column_predicate_test.cpp index 49d7c30e5a..b1fc0985d1 100644 --- a/be/test/olap/block_column_predicate_test.cpp +++ b/be/test/olap/block_column_predicate_test.cpp @@ -25,8 +25,8 @@ #include "olap/field.h" #include "olap/wrapper_field.h" #include "runtime/mem_pool.h" -#include "runtime/string_value.hpp" #include "vec/columns/predicate_column.h" +#include "vec/common/string_ref.h" namespace doris { diff --git a/be/test/runtime/string_buffer_test.cpp b/be/test/runtime/string_buffer_test.cpp index 2064b282c9..2e8d7a1b33 100644 --- a/be/test/runtime/string_buffer_test.cpp +++ b/be/test/runtime/string_buffer_test.cpp @@ -30,7 +30,7 @@ void validate_string(const std::string& std_str, const StringBuffer& str) { EXPECT_EQ((int)std_str.size(), str.size()); if (std_str.size() > 0) { - EXPECT_EQ(strncmp(std_str.c_str(), str.str().ptr, std_str.size()), 0); + EXPECT_EQ(strncmp(std_str.c_str(), str.str().data, std_str.size()), 0); } } diff --git a/be/test/runtime/string_value_test.cpp b/be/test/runtime/string_value_test.cpp index 5e063929b6..6d41d35bff 100644 --- a/be/test/runtime/string_value_test.cpp +++ b/be/test/runtime/string_value_test.cpp @@ -15,25 +15,24 @@ // specific language governing permissions and limitations // under the License. -#include "runtime/string_value.hpp" - #include #include #include "util/cpu_info.h" +#include "vec/common/string_ref.h" using std::string; namespace doris { -StringValue FromStdString(const string& str) { +StringRef FromStdString(const string& str) { char* ptr = const_cast(str.c_str()); int len = str.size(); - return StringValue(ptr, len); + return StringRef(ptr, len); } -TEST(StringValueTest, TestCompare) { +TEST(StringRefTest, TestCompare) { std::string empty_str = ""; std::string str1_str = "abc"; std::string str2_str = "abcdef"; @@ -42,7 +41,7 @@ TEST(StringValueTest, TestCompare) { const int NUM_STRINGS = 4; // Must be in lexical order - StringValue svs[NUM_STRINGS]; + StringRef svs[NUM_STRINGS]; svs[0] = FromStdString(empty_str); svs[1] = FromStdString(str1_str); svs[2] = FromStdString(str2_str); diff --git a/be/test/util/array_parser_test.cpp b/be/test/util/array_parser_test.cpp index c88ef91753..1b0f93823b 100644 --- a/be/test/util/array_parser_test.cpp +++ b/be/test/util/array_parser_test.cpp @@ -22,8 +22,8 @@ #include "olap/tablet_schema.h" #include "olap/types.h" -#include "runtime/string_value.h" #include "testutil/array_utils.h" +#include "vec/common/string_ref.h" namespace doris { @@ -81,7 +81,7 @@ TEST(ArrayParserTest, TestParseVarcharArray) { char data[] = {'a', 'b', 'c'}; int num_items = sizeof(data) / sizeof(data[0]); - StringValue string_values[] = { + StringRef string_values[] = { {&data[0], 1}, {&data[1], 1}, {&data[2], 1}, diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index a4de5e9129..991a82229e 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -26,13 +26,13 @@ #include "agent/be_exec_version_manager.h" #include "exec/schema_scanner.h" #include "gen_cpp/data.pb.h" -#include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "vec/columns/column_array.h" #include "vec/columns/column_decimal.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" +#include "vec/common/string_ref.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_bitmap.h" diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp b/be/test/vec/exec/parquet/parquet_thrift_test.cpp index f41b2da5d9..c838d11b92 100644 --- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp +++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp @@ -26,9 +26,9 @@ #include "io/buffered_reader.h" #include "io/fs/local_file_system.h" #include "olap/iterators.h" -#include "runtime/string_value.h" #include "util/runtime_profile.h" #include "util/timezone_utils.h" +#include "vec/common/string_ref.h" #include "vec/core/block.h" #include "vec/core/column_with_type_and_name.h" #include "vec/data_types/data_type_factory.hpp" @@ -235,14 +235,14 @@ static void create_block(std::unique_ptr& block) { {"boolean_col", TYPE_BOOLEAN, sizeof(bool), true}, {"float_col", TYPE_FLOAT, sizeof(float_t), true}, {"double_col", TYPE_DOUBLE, sizeof(double_t), true}, - {"string_col", TYPE_STRING, sizeof(StringValue), true}, + {"string_col", TYPE_STRING, sizeof(StringRef), true}, // binary is not supported, use string instead - {"binary_col", TYPE_STRING, sizeof(StringValue), true}, + {"binary_col", TYPE_STRING, sizeof(StringRef), true}, // 64-bit-length, see doris::get_slot_size in primitive_type.cpp {"timestamp_col", TYPE_DATETIME, sizeof(DateTimeValue), true}, {"decimal_col", TYPE_DECIMALV2, sizeof(DecimalV2Value), true}, - {"char_col", TYPE_CHAR, sizeof(StringValue), true}, - {"varchar_col", TYPE_VARCHAR, sizeof(StringValue), true}, + {"char_col", TYPE_CHAR, sizeof(StringRef), true}, + {"varchar_col", TYPE_VARCHAR, sizeof(StringRef), true}, {"date_col", TYPE_DATE, sizeof(DateTimeValue), true}, {"date_v2_col", TYPE_DATEV2, sizeof(uint32_t), true}, {"timestamp_v2_col", TYPE_DATETIMEV2, sizeof(DateTimeValue), true, 18, 0}}; @@ -355,12 +355,12 @@ TEST_F(ParquetThriftReaderTest, group_reader) { {"boolean_col", TYPE_BOOLEAN, sizeof(bool), true}, {"float_col", TYPE_FLOAT, sizeof(float_t), true}, {"double_col", TYPE_DOUBLE, sizeof(double_t), true}, - {"string_col", TYPE_STRING, sizeof(StringValue), true}, - {"binary_col", TYPE_STRING, sizeof(StringValue), true}, + {"string_col", TYPE_STRING, sizeof(StringRef), true}, + {"binary_col", TYPE_STRING, sizeof(StringRef), true}, {"timestamp_col", TYPE_DATETIME, sizeof(DateTimeValue), true}, {"decimal_col", TYPE_DECIMALV2, sizeof(DecimalV2Value), true}, - {"char_col", TYPE_CHAR, sizeof(StringValue), true}, - {"varchar_col", TYPE_VARCHAR, sizeof(StringValue), true}, + {"char_col", TYPE_CHAR, sizeof(StringRef), true}, + {"varchar_col", TYPE_VARCHAR, sizeof(StringRef), true}, {"date_col", TYPE_DATE, sizeof(DateTimeValue), true}}; int num_cols = sizeof(column_descs) / sizeof(SchemaScanner::ColumnDesc); SchemaScanner schema_scanner(column_descs, num_cols); diff --git a/gensrc/script/gen_functions.py b/gensrc/script/gen_functions.py index 303e9fda02..bdc83e4b1d 100755 --- a/gensrc/script/gen_functions.py +++ b/gensrc/script/gen_functions.py @@ -585,7 +585,7 @@ cc_preamble = '\ #include "gen_cpp/opcode/functions.h"\n\ #include "exprs/expr.h"\n\ #include "exprs/case_expr.h"\n\ -#include "runtime/string_value.hpp"\n\ +#include "vec/common/string_tmp.h"\n\ #include "runtime/tuple_row.h"\n\ #include "util/mysql_dtoa.h"\n\ #include "util/string_parser.hpp"\n\ diff --git a/gensrc/script/gen_vector_functions.py b/gensrc/script/gen_vector_functions.py index f77742b769..073e2f8655 100755 --- a/gensrc/script/gen_vector_functions.py +++ b/gensrc/script/gen_vector_functions.py @@ -393,7 +393,7 @@ cc_preamble = '\ #include "exprs/case_expr.h"\n\ #include "exprs/expr.h"\n\ #include "exprs/in_predicate.h"\n\ -#include "runtime/string_value.hpp"\n\ +#include "vec/common/string_tmp.h"\n\ #include "runtime/vectorized_row_batch.h"\n\ #include "util/string_parser.hpp"\n\ #include \n\