From 8738ce380b8abbaa19a80b8b7364e155be4ac2b6 Mon Sep 17 00:00:00 2001 From: Zhengguo Yang Date: Wed, 18 Aug 2021 09:05:40 +0800 Subject: [PATCH] Add long text type STRING, with a maximum length of 2GB. Usage is similar to varchar, and there is no guarantee for the performance of storing extremely long data (#6391) --- be/CMakeLists.txt | 17 +- be/src/exec/es/es_predicate.cpp | 5 +- be/src/exec/es/es_scroll_parser.cpp | 3 +- be/src/exec/es_scan_node.cpp | 6 +- be/src/exec/merge_join_node.cpp | 1 + be/src/exec/odbc_connector.cpp | 5 +- be/src/exec/olap_rewrite_node.cpp | 12 +- be/src/exec/olap_scan_node.cpp | 9 +- be/src/exec/olap_scanner.cpp | 3 +- be/src/exec/olap_utils.h | 1 + be/src/exec/parquet_writer.cpp | 3 +- be/src/exec/partitioned_hash_table.cc | 2 +- .../schema_scanner/schema_columns_scanner.cpp | 9 +- be/src/exec/tablet_sink.cpp | 15 +- be/src/exec/text_converter.hpp | 3 +- be/src/exprs/agg_fn_evaluator.cpp | 8 +- be/src/exprs/anyval_util.cpp | 5 + be/src/exprs/anyval_util.h | 3 + be/src/exprs/binary_predicate.cpp | 7 + be/src/exprs/bloomfilter_predicate.cpp | 2 + be/src/exprs/case_expr.cpp | 2 + be/src/exprs/expr.cpp | 8 +- be/src/exprs/expr_context.cpp | 3 +- be/src/exprs/hybrid_set.cpp | 1 + be/src/exprs/literal.cpp | 1 + be/src/exprs/new_agg_fn_evaluator.cc | 5 +- be/src/exprs/runtime_filter.cpp | 16 +- be/src/olap/aggregate_func.cpp | 5 + be/src/olap/aggregate_func.h | 24 + be/src/olap/column_vector.cpp | 4 + be/src/olap/delete_handler.cpp | 1 + be/src/olap/field.h | 153 +++-- be/src/olap/key_coder.cpp | 1 + be/src/olap/key_coder.h | 29 + be/src/olap/olap_common.h | 20 +- be/src/olap/olap_define.h | 10 +- be/src/olap/olap_index.cpp | 33 +- be/src/olap/push_handler.cpp | 49 +- be/src/olap/reader.cpp | 8 +- be/src/olap/row_block.cpp | 2 +- be/src/olap/row_block2.cpp | 11 +- be/src/olap/row_block2.h | 1 + be/src/olap/row_cursor.cpp | 70 ++- be/src/olap/row_cursor.h | 7 +- be/src/olap/rowset/column_data.cpp | 2 +- be/src/olap/rowset/column_reader.cpp | 3 +- be/src/olap/rowset/column_reader.h | 3 +- be/src/olap/rowset/column_writer.cpp | 6 +- be/src/olap/rowset/segment_group.cpp | 4 +- .../rowset/segment_v2/binary_dict_page.cpp | 1 + .../rowset/segment_v2/bitmap_index_writer.cpp | 3 + .../segment_v2/bloom_filter_index_writer.cpp | 5 +- .../olap/rowset/segment_v2/column_reader.cpp | 3 +- .../olap/rowset/segment_v2/encoding_info.cpp | 4 + be/src/olap/schema.h | 2 + be/src/olap/schema_change.cpp | 28 +- be/src/olap/stream_index_common.cpp | 3 +- be/src/olap/tablet_meta.cpp | 3 +- be/src/olap/tablet_schema.cpp | 7 + be/src/olap/types.cpp | 2 + be/src/olap/types.h | 91 ++- be/src/olap/wrapper_field.cpp | 17 +- be/src/runtime/dpp_sink.cpp | 5 +- be/src/runtime/dpp_writer.cpp | 1 + be/src/runtime/export_sink.cpp | 3 +- be/src/runtime/file_result_writer.cpp | 3 +- be/src/runtime/fold_constant_mgr.cpp | 1 + be/src/runtime/mysql_result_writer.cpp | 3 +- be/src/runtime/mysql_table_writer.cpp | 3 +- be/src/runtime/primitive_type.cpp | 13 + be/src/runtime/primitive_type.h | 39 +- be/src/runtime/raw_value.cpp | 13 +- be/src/runtime/raw_value.h | 11 +- be/src/runtime/raw_value_ir.cpp | 1 + be/src/runtime/types.h | 16 +- be/src/runtime/vectorized_row_batch.cpp | 4 +- be/src/service/CMakeLists.txt | 23 +- be/src/util/arrow/row_batch.cpp | 4 +- be/src/util/symbols_util.cpp | 1 + be/test/olap/column_reader_test.cpp | 572 ++++++++++++------ be/test/olap/row_cursor_test.cpp | 6 +- docs/.vuepress/sidebar/en.js | 1 + docs/.vuepress/sidebar/zh-CN.js | 1 + .../sql-statements/Data Types/STRING.md | 35 ++ .../sql-statements/Data Types/VARCHAR.md | 2 +- .../sql-statements/Data Types/STRING.md | 36 ++ fe/fe-core/src/main/cup/sql_parser.cup | 4 +- .../org/apache/doris/alter/AlterOpType.java | 2 +- .../doris/analysis/BinaryPredicate.java | 9 +- .../org/apache/doris/analysis/ColumnDef.java | 1 + .../apache/doris/analysis/OutFileClause.java | 4 +- .../org/apache/doris/analysis/TypeDef.java | 4 +- .../org/apache/doris/catalog/ColumnType.java | 10 + .../org/apache/doris/catalog/Function.java | 2 + .../apache/doris/catalog/PrimitiveType.java | 67 +- .../apache/doris/catalog/ScalarFunction.java | 1 + .../org/apache/doris/catalog/ScalarType.java | 57 +- .../org/apache/doris/catalog/SchemaTable.java | 6 - .../java/org/apache/doris/catalog/Type.java | 43 +- .../org/apache/doris/common/util/Util.java | 1 + .../org/apache/doris/httpv2/HttpServer.java | 1 + .../apache/doris/planner/OlapScanNode.java | 2 +- .../apache/doris/qe/cache/PartitionRange.java | 1 + fe/fe-core/src/main/resources/doris-logo.png | Bin 0 -> 3304 bytes gensrc/proto/internal_service.proto | 1 + gensrc/thrift/Types.thrift | 3 +- 106 files changed, 1353 insertions(+), 433 deletions(-) create mode 100644 docs/en/sql-reference/sql-statements/Data Types/STRING.md create mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Types/STRING.md create mode 100644 fe/fe-core/src/main/resources/doris-logo.png diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index a4ec87d7f9..62593407ae 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -459,11 +459,17 @@ set(DORIS_LINK_LIBS Util DorisGen Webserver - TestUtil Geo Plugin ${WL_END_GROUP} ) +if (${MAKE_TEST} STREQUAL "ON") + set(DORIS_LINK_LIBS + ${DORIS_LINK_LIBS} + TestUtil + ) +endif() + # COMMON_THIRDPARTY are thirdparty dependencies that can run on all platform # When adding new dependencies, If you don’t know if it can run on all platforms, @@ -611,6 +617,7 @@ if (${MAKE_TEST} STREQUAL "ON") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -DGTEST_USE_OWN_TR1_TUPLE=0") SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-arcs -ftest-coverage -lgcov") add_definitions(-DBE_TEST) + add_subdirectory(${SRC_DIR}/testutil) endif () add_subdirectory(${SRC_DIR}/agent) @@ -625,10 +632,12 @@ add_subdirectory(${SRC_DIR}/http) add_subdirectory(${SRC_DIR}/olap) add_subdirectory(${SRC_DIR}/runtime) add_subdirectory(${SRC_DIR}/service) -add_subdirectory(${SRC_DIR}/testutil) -#add_subdirectory(${SRC_DIR}/tools) add_subdirectory(${SRC_DIR}/udf) -add_subdirectory(${SRC_DIR}/tools) + +if (${MAKE_TEST} STREQUAL "OFF") + add_subdirectory(${SRC_DIR}/tools) +endif() + add_subdirectory(${SRC_DIR}/util) add_subdirectory(${SRC_DIR}/plugin) diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp index 3a73f15fc9..72eb6faa3f 100644 --- a/be/src/exec/es/es_predicate.cpp +++ b/be/src/exec/es/es_predicate.cpp @@ -79,6 +79,7 @@ std::string ExtLiteral::value_to_string() { break; case TYPE_CHAR: case TYPE_VARCHAR: + case TYPE_STRING: ss << get_string(); break; case TYPE_DATE: @@ -134,7 +135,7 @@ double ExtLiteral::get_double() { } std::string ExtLiteral::get_string() { - DCHECK(_type == TYPE_VARCHAR || _type == TYPE_CHAR); + DCHECK(_type == TYPE_VARCHAR || _type == TYPE_CHAR || _type == TYPE_STRING); return (reinterpret_cast(_value))->to_string(); } @@ -331,7 +332,7 @@ Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { } PrimitiveType type = expr->type().type; - if (type != TYPE_VARCHAR && type != TYPE_CHAR) { + if (type != TYPE_VARCHAR && type != TYPE_CHAR && type != TYPE_STRING) { return Status::InternalError("build disjuncts failed: like value is not a string"); } std::string col = slot_desc->col_name(); diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 9be212a8de..44ca493e63 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -327,7 +327,8 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, } switch (type) { case TYPE_CHAR: - case TYPE_VARCHAR: { + case TYPE_VARCHAR: + case TYPE_STRING: { // sometimes elasticsearch user post some not-string value to Elasticsearch Index. // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation // this may be a tricky, but we can workaround this issue diff --git a/be/src/exec/es_scan_node.cpp b/be/src/exec/es_scan_node.cpp index 86116a3c96..563bb25b07 100644 --- a/be/src/exec/es_scan_node.cpp +++ b/be/src/exec/es_scan_node.cpp @@ -645,7 +645,8 @@ bool EsScanNode::to_ext_literal(PrimitiveType slot_type, void* value, TExtLitera } case TYPE_CHAR: - case TYPE_VARCHAR: { + case TYPE_VARCHAR: + case TYPE_STRING: { node_type = (TExprNodeType::STRING_LITERAL); TStringLiteral string_literal; string_literal.__set_value((reinterpret_cast(value))->debug_string()); @@ -762,7 +763,8 @@ Status EsScanNode::materialize_row(MemPool* tuple_pool, Tuple* tuple, int val_idx = cols_next_val_idx[i]++; switch (slot_desc->type().type) { case TYPE_CHAR: - case TYPE_VARCHAR: { + case TYPE_VARCHAR: + case TYPE_STRING: { if (val_idx >= col.string_vals.size()) { return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "STRING")); } diff --git a/be/src/exec/merge_join_node.cpp b/be/src/exec/merge_join_node.cpp index 93618ea43d..f972ab5f4d 100644 --- a/be/src/exec/merge_join_node.cpp +++ b/be/src/exec/merge_join_node.cpp @@ -103,6 +103,7 @@ Status MergeJoinNode::prepare(RuntimeState* state) { case TYPE_CHAR: case TYPE_VARCHAR: + case TYPE_STRING: _cmp_func.push_back(compare_value); break; diff --git a/be/src/exec/odbc_connector.cpp b/be/src/exec/odbc_connector.cpp index 0bfd579ebc..2294ba7cf5 100644 --- a/be/src/exec/odbc_connector.cpp +++ b/be/src/exec/odbc_connector.cpp @@ -147,7 +147,7 @@ Status ODBCConnector::query() { DataBinding* column_data = new DataBinding; column_data->target_type = SQL_C_CHAR; auto type = _tuple_desc->slots()[i]->type().type; - column_data->buffer_length = (type == TYPE_HLL || type == TYPE_CHAR || type == TYPE_VARCHAR) + column_data->buffer_length = (type == TYPE_HLL || type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_STRING) ? BIG_COLUMN_SIZE_BUFFER : SMALL_COLUMN_SIZE_BUFFER; column_data->target_value_ptr = malloc(sizeof(char) * column_data->buffer_length); @@ -254,7 +254,8 @@ Status ODBCConnector::append(const std::string& table_name, RowBatch* batch, break; } case TYPE_VARCHAR: - case TYPE_CHAR: { + case TYPE_CHAR: + case TYPE_STRING: { const auto* string_val = (const StringValue*)(item); if (string_val->ptr == NULL) { diff --git a/be/src/exec/olap_rewrite_node.cpp b/be/src/exec/olap_rewrite_node.cpp index dedea4b566..449f53a115 100644 --- a/be/src/exec/olap_rewrite_node.cpp +++ b/be/src/exec/olap_rewrite_node.cpp @@ -135,7 +135,8 @@ bool OlapRewriteNode::copy_one_row(TupleRow* src_row, Tuple* tuple, MemPool* poo const TColumnType& column_type = _column_types[i]; switch (column_type.type) { case TPrimitiveType::CHAR: - case TPrimitiveType::VARCHAR: { + case TPrimitiveType::VARCHAR: + case TPrimitiveType::STRING: { // Fixed length string StringValue* str_val = (StringValue*)src_value; if (str_val->len > column_type.len) { @@ -145,8 +146,7 @@ bool OlapRewriteNode::copy_one_row(TupleRow* src_row, Tuple* tuple, MemPool* poo << "schema length: " << column_type.len << "; " << "actual length: " << str_val->len << "; "; return false; - } - StringValue* dst_val = (StringValue*)tuple->get_slot(slot_desc->tuple_offset()); + } StringValue* dst_val = (StringValue*)tuple->get_slot(slot_desc->tuple_offset()); if (column_type.type == TPrimitiveType::CHAR) { dst_val->ptr = (char*)pool->allocate(column_type.len); memcpy(dst_val->ptr, str_val->ptr, str_val->len); @@ -156,10 +156,8 @@ bool OlapRewriteNode::copy_one_row(TupleRow* src_row, Tuple* tuple, MemPool* poo dst_val->ptr = (char*)pool->allocate(column_type.len); memcpy(dst_val->ptr, str_val->ptr, str_val->len); dst_val->len = str_val->len; - } - break; - } - case TPrimitiveType::DECIMALV2: { + } break; + } case TPrimitiveType::DECIMALV2: { DecimalV2Value* dec_val = (DecimalV2Value*)src_value; DecimalV2Value* dst_val = (DecimalV2Value*)tuple->get_slot(slot_desc->tuple_offset()); if (dec_val->greater_than_scale(column_type.scale)) { diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 03a6ad920b..08bcc814bc 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -578,7 +578,8 @@ Status OlapScanNode::normalize_conjuncts() { case TYPE_CHAR: case TYPE_VARCHAR: - case TYPE_HLL: { + case TYPE_HLL: + case TYPE_STRING: { ColumnValueRange range(slots[slot_idx]->col_name(), slots[slot_idx]->type().type); normalize_predicate(range, slots[slot_idx]); @@ -952,7 +953,8 @@ Status OlapScanNode::change_fixed_value_range(ColumnValueRange& temp_range, P case TYPE_SMALLINT: case TYPE_INT: case TYPE_BIGINT: - case TYPE_LARGEINT: { + case TYPE_LARGEINT: + case TYPE_STRING: { func(temp_range, reinterpret_cast(value)); break; } @@ -1250,7 +1252,8 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot, case TYPE_INT: case TYPE_BIGINT: case TYPE_LARGEINT: - case TYPE_BOOLEAN: { + case TYPE_BOOLEAN: + case TYPE_STRING: { range->add_range(to_olap_filter_type(pred->op(), child_idx), *reinterpret_cast(value)); break; diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index 64053e9945..073c92b94a 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -443,7 +443,8 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) { } case TYPE_VARCHAR: case TYPE_OBJECT: - case TYPE_HLL: { + case TYPE_HLL: + case TYPE_STRING: { Slice* slice = reinterpret_cast(ptr); StringValue* slot = tuple->get_string_slot(slot_desc->tuple_offset()); slot->ptr = slice->data; diff --git a/be/src/exec/olap_utils.h b/be/src/exec/olap_utils.h index b3929f3b7d..a510a172e0 100644 --- a/be/src/exec/olap_utils.h +++ b/be/src/exec/olap_utils.h @@ -70,6 +70,7 @@ inline CompareLargeFunc get_compare_func(PrimitiveType type) { case TYPE_CHAR: case TYPE_VARCHAR: + case TYPE_STRING: return compare_large; default: diff --git a/be/src/exec/parquet_writer.cpp b/be/src/exec/parquet_writer.cpp index 319f65d90a..0b8be8a544 100644 --- a/be/src/exec/parquet_writer.cpp +++ b/be/src/exec/parquet_writer.cpp @@ -356,7 +356,8 @@ Status ParquetWriterWrapper::_write_one_row(TupleRow* row) { break; } case TYPE_CHAR: - case TYPE_VARCHAR: { + case TYPE_VARCHAR: + case TYPE_STRING: { if (_str_schema[index][1] != "byte_array") { std::stringstream ss; ss << "project field type is char/varchar, should use byte_array, but the " diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc index a3d3fab94c..b6801a44ee 100644 --- a/be/src/exec/partitioned_hash_table.cc +++ b/be/src/exec/partitioned_hash_table.cc @@ -234,7 +234,7 @@ uint32_t PartitionedHashTableCtx::HashVariableLenRow(const uint8_t* expr_values, // non-string and null slots are already part of 'expr_values'. // if (build_expr_ctxs_[i]->root()->type().type != TYPE_STRING PrimitiveType type = build_exprs_[i]->type().type; - if (type != TYPE_CHAR && type != TYPE_VARCHAR) { + if (type != TYPE_CHAR && type != TYPE_VARCHAR && type != TYPE_STRING) { continue; } diff --git a/be/src/exec/schema_scanner/schema_columns_scanner.cpp b/be/src/exec/schema_scanner/schema_columns_scanner.cpp index 365d3b0f45..2b8ce49d8d 100644 --- a/be/src/exec/schema_scanner/schema_columns_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_columns_scanner.cpp @@ -112,6 +112,7 @@ std::string SchemaColumnsScanner::to_mysql_data_type_string(TColumnDesc& desc) { case TPrimitiveType::DOUBLE: return "double"; case TPrimitiveType::VARCHAR: + case TPrimitiveType::STRING: return "varchar"; case TPrimitiveType::CHAR: return "char"; @@ -151,6 +152,8 @@ std::string SchemaColumnsScanner::type_to_string(TColumnDesc& desc) { } else { return "varchar(20)"; } + case TPrimitiveType::STRING: + return "string"; case TPrimitiveType::CHAR: if (desc.__isset.columnLength) { return "char(" + std::to_string(desc.columnLength) + ")"; @@ -261,7 +264,8 @@ Status SchemaColumnsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // For string columns, the maximum length in characters. { int data_type = _desc_result.columns[_column_index].columnDesc.columnType; - if (data_type == TPrimitiveType::VARCHAR || data_type == TPrimitiveType::CHAR) { + if (data_type == TPrimitiveType::VARCHAR || data_type == TPrimitiveType::CHAR || + data_type == TPrimitiveType::STRING) { void* slot = tuple->get_slot(_tuple_desc->slots()[8]->tuple_offset()); int64_t* str_slot = reinterpret_cast(slot); if (_desc_result.columns[_column_index].columnDesc.__isset.columnLength) { @@ -277,7 +281,8 @@ Status SchemaColumnsScanner::fill_one_row(Tuple* tuple, MemPool* pool) { // For string columns, the maximum length in bytes. { int data_type = _desc_result.columns[_column_index].columnDesc.columnType; - if (data_type == TPrimitiveType::VARCHAR || data_type == TPrimitiveType::CHAR) { + if (data_type == TPrimitiveType::VARCHAR || data_type == TPrimitiveType::CHAR || + data_type == TPrimitiveType::STRING) { void* slot = tuple->get_slot(_tuple_desc->slots()[9]->tuple_offset()); int64_t* str_slot = reinterpret_cast(slot); if (_desc_result.columns[_column_index].columnDesc.__isset.columnLength) { diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index 5a7ac0503d..a9488ec7de 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -594,6 +594,7 @@ Status OlapTableSink::prepare(RuntimeState* state) { case TYPE_DATETIME: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: _need_validate_data = true; break; default: @@ -698,7 +699,6 @@ Status OlapTableSink::send(RuntimeState* state, RowBatch* input_batch) { _convert_batch(state, input_batch, _output_batch.get()); batch = _output_batch.get(); } - int num_invalid_rows = 0; if (_need_validate_data) { SCOPED_RAW_TIMER(&_validate_data_ns); @@ -939,6 +939,19 @@ int OlapTableSink::_validate_data(RuntimeState* state, RowBatch* batch, Bitmap* } break; } + case TYPE_STRING: { + StringValue* str_val = (StringValue*)slot; + if (str_val->len > desc->type().MAX_STRING_LENGTH) { + ss << "the length of input is too long than schema. " + << "column_name: " << desc->col_name() << "; " + << "first 128 bytes of input_str: [" << std::string(str_val->ptr, 128) << "] " + << "schema length: " << desc->type().MAX_STRING_LENGTH << "; " + << "actual length: " << str_val->len << "; "; + row_valid = false; + continue; + } + break; + } case TYPE_DECIMALV2: { DecimalV2Value dec_val(reinterpret_cast(slot)->value); if (dec_val.greater_than_scale(desc->type().scale)) { diff --git a/be/src/exec/text_converter.hpp b/be/src/exec/text_converter.hpp index 6b66755460..1b1a60e64b 100644 --- a/be/src/exec/text_converter.hpp +++ b/be/src/exec/text_converter.hpp @@ -57,7 +57,8 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, Tuple* tu switch (slot_desc->type().type) { case TYPE_HLL: case TYPE_VARCHAR: - case TYPE_CHAR: { + case TYPE_CHAR: + case TYPE_STRING: { StringValue* str_slot = reinterpret_cast(slot); str_slot->ptr = const_cast(data); str_slot->len = len; diff --git a/be/src/exprs/agg_fn_evaluator.cpp b/be/src/exprs/agg_fn_evaluator.cpp index 5e11f20948..f8a3d60db1 100644 --- a/be/src/exprs/agg_fn_evaluator.cpp +++ b/be/src/exprs/agg_fn_evaluator.cpp @@ -313,6 +313,7 @@ inline void AggFnEvaluator::set_any_val(const void* slot, const TypeDescriptor& case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: reinterpret_cast(slot)->to_string_val( reinterpret_cast(dst)); return; @@ -383,6 +384,7 @@ inline void AggFnEvaluator::set_output_slot(const AnyVal* src, const SlotDescrip case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: *reinterpret_cast(slot) = StringValue::from_string_val(*reinterpret_cast(src)); return; @@ -563,7 +565,8 @@ bool AggFnEvaluator::count_distinct_data_filter(TupleRow* row, Tuple* dst) { case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: - case TYPE_OBJECT: { + case TYPE_OBJECT: + case TYPE_STRING: { StringVal* value = reinterpret_cast(_staging_input_vals[i]); memcpy(begin, value->ptr, value->len); begin += value->len; @@ -890,7 +893,8 @@ void AggFnEvaluator::serialize_or_finalize(FunctionContext* agg_fn_ctx, Tuple* s case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: - case TYPE_OBJECT: { + case TYPE_OBJECT: + case TYPE_STRING: { typedef StringVal (*Fn)(FunctionContext*, AnyVal*); StringVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); set_output_slot(&v, dst_slot_desc, dst); diff --git a/be/src/exprs/anyval_util.cpp b/be/src/exprs/anyval_util.cpp index c61a9aa55e..18cecad096 100644 --- a/be/src/exprs/anyval_util.cpp +++ b/be/src/exprs/anyval_util.cpp @@ -80,6 +80,7 @@ AnyVal* create_any_val(ObjectPool* pool, const TypeDescriptor& type) { case TYPE_HLL: case TYPE_VARCHAR: case TYPE_OBJECT: + case TYPE_STRING: return pool->add(new StringVal); case TYPE_DECIMALV2: @@ -162,6 +163,10 @@ FunctionContext::TypeDesc AnyValUtil::column_type_to_type_desc(const TypeDescrip out.children.push_back(column_type_to_type_desc(t)); } break; + case TYPE_STRING: + out.type = FunctionContext::TYPE_STRING; + out.len = type.len; + break; default: DCHECK(false) << "Unknown type: " << type; } diff --git a/be/src/exprs/anyval_util.h b/be/src/exprs/anyval_util.h index d465c12866..3eb6cf641d 100644 --- a/be/src/exprs/anyval_util.h +++ b/be/src/exprs/anyval_util.h @@ -239,6 +239,7 @@ public: case TYPE_HLL: case TYPE_CHAR: case TYPE_VARCHAR: + case TYPE_STRING: return sizeof(doris_udf::StringVal); case TYPE_DATE: @@ -280,6 +281,7 @@ public: case TYPE_HLL: case TYPE_VARCHAR: case TYPE_CHAR: + case TYPE_STRING: return alignof(StringVal); case TYPE_DATETIME: case TYPE_DATE: @@ -376,6 +378,7 @@ public: case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: reinterpret_cast(slot)->to_string_val( reinterpret_cast(dst)); return; diff --git a/be/src/exprs/binary_predicate.cpp b/be/src/exprs/binary_predicate.cpp index 7d67b160b2..6bc7cd65fe 100644 --- a/be/src/exprs/binary_predicate.cpp +++ b/be/src/exprs/binary_predicate.cpp @@ -50,6 +50,7 @@ Expr* BinaryPredicate::from_thrift(const TExprNode& node) { return new EqDoubleValPred(node); case TPrimitiveType::CHAR: case TPrimitiveType::VARCHAR: + case TPrimitiveType::STRING: return new EqStringValPred(node); case TPrimitiveType::DATE: case TPrimitiveType::DATETIME: @@ -80,6 +81,7 @@ Expr* BinaryPredicate::from_thrift(const TExprNode& node) { return new NeDoubleValPred(node); case TPrimitiveType::CHAR: case TPrimitiveType::VARCHAR: + case TPrimitiveType::STRING: return new NeStringValPred(node); case TPrimitiveType::DATE: case TPrimitiveType::DATETIME: @@ -110,6 +112,7 @@ Expr* BinaryPredicate::from_thrift(const TExprNode& node) { return new LtDoubleValPred(node); case TPrimitiveType::CHAR: case TPrimitiveType::VARCHAR: + case TPrimitiveType::STRING: return new LtStringValPred(node); case TPrimitiveType::DATE: case TPrimitiveType::DATETIME: @@ -140,6 +143,7 @@ Expr* BinaryPredicate::from_thrift(const TExprNode& node) { return new LeDoubleValPred(node); case TPrimitiveType::CHAR: case TPrimitiveType::VARCHAR: + case TPrimitiveType::STRING: return new LeStringValPred(node); case TPrimitiveType::DATE: case TPrimitiveType::DATETIME: @@ -170,6 +174,7 @@ Expr* BinaryPredicate::from_thrift(const TExprNode& node) { return new GtDoubleValPred(node); case TPrimitiveType::CHAR: case TPrimitiveType::VARCHAR: + case TPrimitiveType::STRING: return new GtStringValPred(node); case TPrimitiveType::DATE: case TPrimitiveType::DATETIME: @@ -200,6 +205,7 @@ Expr* BinaryPredicate::from_thrift(const TExprNode& node) { return new GeDoubleValPred(node); case TPrimitiveType::CHAR: case TPrimitiveType::VARCHAR: + case TPrimitiveType::STRING: return new GeStringValPred(node); case TPrimitiveType::DATE: case TPrimitiveType::DATETIME: @@ -230,6 +236,7 @@ Expr* BinaryPredicate::from_thrift(const TExprNode& node) { return new EqForNullDoubleValPred(node); case TPrimitiveType::CHAR: case TPrimitiveType::VARCHAR: + case TPrimitiveType::STRING: return new EqForNullStringValPred(node); case TPrimitiveType::DATE: case TPrimitiveType::DATETIME: diff --git a/be/src/exprs/bloomfilter_predicate.cpp b/be/src/exprs/bloomfilter_predicate.cpp index cd2004631e..c3b199d0a6 100644 --- a/be/src/exprs/bloomfilter_predicate.cpp +++ b/be/src/exprs/bloomfilter_predicate.cpp @@ -56,6 +56,8 @@ IBloomFilterFuncBase* IBloomFilterFuncBase::create_bloom_filter(MemTracker* trac return new BloomFilterFunc(tracker); case TYPE_VARCHAR: return new BloomFilterFunc(tracker); + case TYPE_STRING: + return new BloomFilterFunc(tracker); default: return nullptr; } diff --git a/be/src/exprs/case_expr.cpp b/be/src/exprs/case_expr.cpp index 5602d067e0..79fa34c67a 100644 --- a/be/src/exprs/case_expr.cpp +++ b/be/src/exprs/case_expr.cpp @@ -110,6 +110,7 @@ void CaseExpr::get_child_val(int child_idx, ExprContext* ctx, TupleRow* row, Any case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: *reinterpret_cast(dst) = _children[child_idx]->get_string_val(ctx, row); break; case TYPE_DECIMALV2: @@ -154,6 +155,7 @@ bool CaseExpr::any_val_eq(const TypeDescriptor& type, const AnyVal* v1, const An case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: return AnyValUtil::equals(type, *reinterpret_cast(v1), *reinterpret_cast(v2)); case TYPE_DECIMALV2: diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp index e4f88eb450..544af354e9 100644 --- a/be/src/exprs/expr.cpp +++ b/be/src/exprs/expr.cpp @@ -140,6 +140,7 @@ Expr::Expr(const TypeDescriptor& type) case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: _node_type = (TExprNodeType::STRING_LITERAL); break; @@ -197,6 +198,7 @@ Expr::Expr(const TypeDescriptor& type, bool is_slotref) case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: _node_type = (TExprNodeType::STRING_LITERAL); break; @@ -457,7 +459,8 @@ int Expr::compute_results_layout(const std::vector& exprs, std::vectortype().type == TYPE_CHAR || exprs[i]->type().type == TYPE_VARCHAR) { + if (exprs[i]->type().type == TYPE_CHAR || exprs[i]->type().type == TYPE_VARCHAR + || exprs[i]->type().type == TYPE_STRING) { data[i].byte_size = 16; data[i].variable_length = true; } else { @@ -704,7 +707,8 @@ doris_udf::AnyVal* Expr::get_const_val(ExprContext* context) { case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: - case TYPE_OBJECT: { + case TYPE_OBJECT: + case TYPE_STRING: { _constant_val.reset(new StringVal(get_string_val(context, NULL))); break; } diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp index d8cd5a3a5d..fcc67c1b5d 100644 --- a/be/src/exprs/expr_context.cpp +++ b/be/src/exprs/expr_context.cpp @@ -245,7 +245,8 @@ void* ExprContext::get_value(Expr* e, TupleRow* row) { case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: - case TYPE_OBJECT: { + case TYPE_OBJECT: + case TYPE_STRING: { doris_udf::StringVal v = e->get_string_val(this, row); if (v.is_null) { return nullptr; diff --git a/be/src/exprs/hybrid_set.cpp b/be/src/exprs/hybrid_set.cpp index 07ebbc8ca4..4ebdabcaf0 100644 --- a/be/src/exprs/hybrid_set.cpp +++ b/be/src/exprs/hybrid_set.cpp @@ -54,6 +54,7 @@ HybridSetBase* HybridSetBase::create_set(PrimitiveType type) { case TYPE_CHAR: case TYPE_VARCHAR: + case TYPE_STRING: return new (std::nothrow) StringValueSet(); default: diff --git a/be/src/exprs/literal.cpp b/be/src/exprs/literal.cpp index 45dfcb0b2a..da251eb86e 100644 --- a/be/src/exprs/literal.cpp +++ b/be/src/exprs/literal.cpp @@ -82,6 +82,7 @@ Literal::Literal(const TExprNode& node) : Expr(node) { break; case TYPE_CHAR: case TYPE_VARCHAR: + case TYPE_STRING: DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL); DCHECK(node.__isset.string_literal); _value.set_string_val(node.string_literal.value); diff --git a/be/src/exprs/new_agg_fn_evaluator.cc b/be/src/exprs/new_agg_fn_evaluator.cc index 0d192d0d85..b7f92d54aa 100644 --- a/be/src/exprs/new_agg_fn_evaluator.cc +++ b/be/src/exprs/new_agg_fn_evaluator.cc @@ -267,6 +267,7 @@ void NewAggFnEvaluator::SetDstSlot(const AnyVal* src, const SlotDescriptor& dst_ case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: *reinterpret_cast(slot) = StringValue::from_string_val(*reinterpret_cast(src)); return; @@ -363,6 +364,7 @@ inline void NewAggFnEvaluator::set_any_val(const void* slot, const TypeDescripto case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: reinterpret_cast(slot)->to_string_val( reinterpret_cast(dst)); return; @@ -603,7 +605,8 @@ void NewAggFnEvaluator::SerializeOrFinalize(Tuple* src, const SlotDescriptor& ds case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: - case TYPE_OBJECT: { + case TYPE_OBJECT: + case TYPE_STRING: { typedef StringVal (*Fn)(FunctionContext*, AnyVal*); StringVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); SetDstSlot(&v, dst_slot_desc, dst); diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index 852601f4e6..574a837495 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -169,6 +169,7 @@ MinMaxFuncBase* MinMaxFuncBase::create_minmax_filter(PrimitiveType type) { case TYPE_CHAR: case TYPE_VARCHAR: + case TYPE_STRING: return new (std::nothrow) MinMaxNumFunc(); default: DCHECK(false) << "Invalid type."; @@ -212,6 +213,7 @@ TExprNodeType::type get_expr_node_type(PrimitiveType type) { case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: return TExprNodeType::STRING_LITERAL; default: @@ -250,6 +252,8 @@ PColumnType to_proto(PrimitiveType type) { return PColumnType::COLUMN_TYPE_CHAR; case TYPE_VARCHAR: return PColumnType::COLUMN_TYPE_VARCHAR; + case TYPE_STRING: + return PColumnType::COLUMN_TYPE_STRING; default: DCHECK(false) << "Invalid type."; } @@ -400,7 +404,8 @@ Expr* create_literal(ObjectPool* pool, PrimitiveType type, const void* data) { break; } case TYPE_CHAR: - case TYPE_VARCHAR: { + case TYPE_VARCHAR: + case TYPE_STRING: { const StringValue* string_value = reinterpret_cast(data); TStringLiteral tstringLiteral; tstringLiteral.__set_value(std::string(string_value->ptr, string_value->len)); @@ -673,7 +678,8 @@ public: return _minmax_func->assign(&min_val, &max_val); } case TYPE_VARCHAR: - case TYPE_CHAR: { + case TYPE_CHAR: + case TYPE_STRING: { auto& min_val_ref = minmax_filter->min_val().stringval(); auto& max_val_ref = minmax_filter->max_val().stringval(); auto min_val_ptr = _pool->add(new std::string(min_val_ref)); @@ -705,7 +711,8 @@ public: if (_filter_type == RuntimeFilterType::MINMAX_FILTER) { switch (_column_return_type) { case TYPE_VARCHAR: - case TYPE_CHAR: { + case TYPE_CHAR: + case TYPE_STRING: { StringValue* min_value = static_cast(_minmax_func->get_min()); StringValue* max_value = static_cast(_minmax_func->get_max()); auto min_val_ptr = _pool->add(new std::string(min_value->ptr)); @@ -1011,7 +1018,8 @@ void IRuntimeFilter::to_protobuf(PMinMaxFilter* filter) { return; } case TYPE_CHAR: - case TYPE_VARCHAR: { + case TYPE_VARCHAR: + case TYPE_STRING: { const StringValue* min_string_value = reinterpret_cast(min_data); filter->mutable_min_val()->set_stringval( std::string(min_string_value->ptr, min_string_value->len)); diff --git a/be/src/olap/aggregate_func.cpp b/be/src/olap/aggregate_func.cpp index 35482d8dca..f3c6d8bcbe 100644 --- a/be/src/olap/aggregate_func.cpp +++ b/be/src/olap/aggregate_func.cpp @@ -102,6 +102,7 @@ AggregateFuncResolver::AggregateFuncResolver() { add_aggregate_mapping(); add_aggregate_mapping(); add_aggregate_mapping(); + add_aggregate_mapping(); add_aggregate_mapping(); // array types has sub type like array field type is array, subtype is int add_aggregate_mapping(); add_aggregate_mapping(); add_aggregate_mapping(); + add_aggregate_mapping(); // Max Aggregate Function add_aggregate_mapping(); @@ -146,6 +148,7 @@ AggregateFuncResolver::AggregateFuncResolver() { add_aggregate_mapping(); add_aggregate_mapping(); add_aggregate_mapping(); + add_aggregate_mapping(); // Sum Aggregate Function add_aggregate_mapping(); @@ -171,6 +174,7 @@ AggregateFuncResolver::AggregateFuncResolver() { add_aggregate_mapping(); add_aggregate_mapping(); add_aggregate_mapping(); + add_aggregate_mapping(); // ReplaceIfNotNull Aggregate Function add_aggregate_mapping(); @@ -186,6 +190,7 @@ AggregateFuncResolver::AggregateFuncResolver() { add_aggregate_mapping(); add_aggregate_mapping(); add_aggregate_mapping(); + add_aggregate_mapping(); // Hyperloglog Aggregate Function add_aggregate_mapping(); diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h index 39e043ceb8..9233d1d8f2 100644 --- a/be/src/olap/aggregate_func.h +++ b/be/src/olap/aggregate_func.h @@ -239,6 +239,10 @@ struct AggregateFuncTraits } }; +template <> +struct AggregateFuncTraits + : public AggregateFuncTraits {}; + template <> struct AggregateFuncTraits : public AggregateFuncTraits {}; @@ -314,6 +318,10 @@ template <> struct AggregateFuncTraits : public AggregateFuncTraits {}; +template <> +struct AggregateFuncTraits + : public AggregateFuncTraits {}; + template struct AggregateFuncTraits : public AggregateFuncTraits { @@ -403,6 +411,11 @@ template <> struct AggregateFuncTraits : public AggregateFuncTraits {}; + +template <> +struct AggregateFuncTraits + : public AggregateFuncTraits {}; + // REPLACE_IF_NOT_NULL template @@ -451,6 +464,12 @@ template <> struct AggregateFuncTraits : public AggregateFuncTraits {}; + +template <> +struct AggregateFuncTraits + : public AggregateFuncTraits {}; + // when data load, after hll_hash function, hll_union column won't be null // so when init, update hll, the src is not null template <> @@ -553,6 +572,11 @@ struct AggregateFuncTraits { }; +template <> +struct AggregateFuncTraits + : public AggregateFuncTraits { +}; + template struct AggregateTraits : public AggregateFuncTraits { diff --git a/be/src/olap/column_vector.cpp b/be/src/olap/column_vector.cpp index 84f4f61267..7e3cf3065b 100644 --- a/be/src/olap/column_vector.cpp +++ b/be/src/olap/column_vector.cpp @@ -105,6 +105,10 @@ Status ColumnVectorBatch::create(size_t init_capacity, bool is_nullable, const T new ScalarColumnVectorBatch::CppType>( type_info, is_nullable)); break; + case OLAP_FIELD_TYPE_STRING: + local.reset(new ScalarColumnVectorBatch::CppType>( + type_info, is_nullable)); + break; case OLAP_FIELD_TYPE_HLL: local.reset(new ScalarColumnVectorBatch::CppType>( type_info, is_nullable)); diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp index 2f169c916c..2996979556 100644 --- a/be/src/olap/delete_handler.cpp +++ b/be/src/olap/delete_handler.cpp @@ -141,6 +141,7 @@ bool DeleteConditionHandler::is_condition_value_valid(const TabletColumn& column return valid_decimal(value_str, column.precision(), column.frac()); case OLAP_FIELD_TYPE_CHAR: case OLAP_FIELD_TYPE_VARCHAR: + case OLAP_FIELD_TYPE_STRING: return value_str.size() <= column.length(); case OLAP_FIELD_TYPE_DATE: case OLAP_FIELD_TYPE_DATETIME: diff --git a/be/src/olap/field.h b/be/src/olap/field.h index e290b50fd2..b7e04bbd27 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -44,11 +44,11 @@ public: explicit Field() = default; explicit Field(const TabletColumn& column) : _type_info(get_type_info(&column)), + _length(column.length()), _key_coder(get_key_coder(column.type())), _name(column.name()), _index_size(column.index_length()), - _is_nullable(column.is_nullable()), - _length(column.length()) { + _is_nullable(column.is_nullable()) { if (column.type() == OLAP_FIELD_TYPE_ARRAY) { _agg_info = get_aggregate_info(column.aggregation(), column.type(), column.get_sub_column(0).type()); @@ -66,14 +66,12 @@ public: inline const std::string& name() const { return _name; } virtual inline void set_to_max(char* buf) const { return _type_info->set_to_max(buf); } - virtual inline void set_to_zone_map_max(char* buf) const { - set_to_max(buf); - } + virtual inline void set_to_zone_map_max(char* buf) const { set_to_max(buf); } - inline void set_to_min(char* buf) const { return _type_info->set_to_min(buf); } - inline void set_to_zone_map_min(char* buf) const { - set_to_min(buf); - } + virtual inline void set_to_min(char* buf) const { return _type_info->set_to_min(buf); } + virtual inline void set_to_zone_map_min(char* buf) const { set_to_min(buf); } + + void set_long_text_buf(char** buf) { _long_text_buf = buf; } // This function allocate memory from pool, other than allocate_memory // reserve memory from continuous memory. @@ -174,6 +172,15 @@ public: // memory allocation. template void direct_copy(DstCellType* dst, const SrcCellType& src) const { + if (type() == OLAP_FIELD_TYPE_STRING) { + auto dst_slice = reinterpret_cast(dst->mutable_cell_ptr()); + auto src_slice = reinterpret_cast(src.cell_ptr()); + if (dst_slice->size < src_slice->size) { + *_long_text_buf = static_cast(realloc(*_long_text_buf, src_slice->size)); + dst_slice->data = *_long_text_buf; + dst_slice->size = src_slice->size; + } + } bool is_null = src.is_null(); dst->set_is_null(is_null); if (is_null) { @@ -232,6 +239,14 @@ public: // used by init scan key stored in string format // value_string should end with '\0' inline OLAPStatus from_string(char* buf, const std::string& value_string) const { + if (type() == OLAP_FIELD_TYPE_STRING) { + auto dst_slice = reinterpret_cast(buf); + if (dst_slice->size < value_string.size()) { + *_long_text_buf = static_cast(realloc(*_long_text_buf, value_string.size())); + dst_slice->data = *_long_text_buf; + dst_slice->size = value_string.size(); + } + } return _type_info->from_string(buf, value_string); } @@ -279,20 +294,16 @@ public: protected: const TypeInfo* _type_info; - -private: - // Field的最大长度,单位为字节,通常等于length, 变长字符串不同 - const KeyCoder* _key_coder; - std::string _name; - uint16_t _index_size; - bool _is_nullable; - std::vector> _sub_fields; - -protected: const AggregateInfo* _agg_info; // 长度,单位为字节 // 除字符串外,其它类型都是确定的 uint32_t _length; + // Since the length of the STRING type cannot be determined, + // only dynamic memory can be used. Mempool cannot realize realloc. + // The schema information is shared globally. Therefore, + // dynamic memory can only be managed in thread local mode. + // The memory will be created and released in rowcursor. + char** _long_text_buf = nullptr; char* allocate_string_value(MemPool* pool) const { char* type_value = (char*)pool->allocate(sizeof(Slice)); @@ -314,6 +325,14 @@ protected: other->add_sub_field(std::unique_ptr(item)); } } + +private: + // Field的最大长度,单位为字节,通常等于length, 变长字符串不同 + const KeyCoder* _key_coder; + std::string _name; + uint16_t _index_size; + bool _is_nullable; + std::vector> _sub_fields; }; template @@ -327,15 +346,15 @@ int Field::index_cmp(const LhsCellType& lhs, const RhsCellType& rhs) const { } int32_t res = 0; - if (type() == OLAP_FIELD_TYPE_VARCHAR) { + if (type() == OLAP_FIELD_TYPE_VARCHAR || type() == OLAP_FIELD_TYPE_STRING) { const Slice* l_slice = reinterpret_cast(lhs.cell_ptr()); const Slice* r_slice = reinterpret_cast(rhs.cell_ptr()); - - if (r_slice->size + OLAP_STRING_MAX_BYTES > _index_size || - l_slice->size + OLAP_STRING_MAX_BYTES > _index_size) { + uint32_t max_bytes = + type() == OLAP_FIELD_TYPE_VARCHAR ? OLAP_VARCHAR_MAX_BYTES : OLAP_STRING_MAX_BYTES; + if (r_slice->size + max_bytes > _index_size || l_slice->size + max_bytes > _index_size) { // 如果field的实际长度比short key长,则仅比较前缀,确保相同short key的所有block都被扫描, // 否则,可以直接比较short key和field - int compare_size = _index_size - OLAP_STRING_MAX_BYTES; + int compare_size = _index_size - max_bytes; // l_slice size and r_slice size may be less than compare_size // so calculate the min of the three size as new compare_size compare_size = std::min(std::min(compare_size, (int)l_slice->size), (int)r_slice->size); @@ -344,7 +363,7 @@ int Field::index_cmp(const LhsCellType& lhs, const RhsCellType& rhs) const { // Only the fixed length of prefix index should be compared. // If r_slice->size > l_slice->size, ignore the extra parts directly. res = strncmp(l_slice->data, r_slice->data, compare_size); - if (res == 0 && compare_size != (_index_size - OLAP_STRING_MAX_BYTES)) { + if (res == 0 && compare_size != (_index_size - max_bytes)) { if (l_slice->size < r_slice->size) { res = -1; } else if (l_slice->size > r_slice->size) { @@ -372,6 +391,16 @@ void Field::to_index(DstCellType* dst, const SrcCellType& src) const { } if (type() == OLAP_FIELD_TYPE_VARCHAR) { + // 先清零,再拷贝 + memset(dst->mutable_cell_ptr(), 0, _index_size); + const Slice* slice = reinterpret_cast(src.cell_ptr()); + size_t copy_size = slice->size < _index_size - OLAP_VARCHAR_MAX_BYTES + ? slice->size + : _index_size - OLAP_VARCHAR_MAX_BYTES; + *reinterpret_cast(dst->mutable_cell_ptr()) = copy_size; + memory_copy((char*)dst->mutable_cell_ptr() + OLAP_VARCHAR_MAX_BYTES, slice->data, + copy_size); + } else if (type() == OLAP_FIELD_TYPE_STRING) { // 先清零,再拷贝 memset(dst->mutable_cell_ptr(), 0, _index_size); const Slice* slice = reinterpret_cast(src.cell_ptr()); @@ -473,11 +502,10 @@ public: // To prevent zone map cost too many memory, if varchar length // longer than `MAX_ZONE_MAP_INDEX_SIZE`. we just allocate // `MAX_ZONE_MAP_INDEX_SIZE` of memory - char* allocate_zone_map_value(MemPool *pool) const override { + char* allocate_zone_map_value(MemPool* pool) const override { char* type_value = (char*)pool->allocate(sizeof(Slice)); auto slice = reinterpret_cast(type_value); - slice->size = MAX_ZONE_MAP_INDEX_SIZE > _length ? _length : - MAX_ZONE_MAP_INDEX_SIZE; + slice->size = MAX_ZONE_MAP_INDEX_SIZE > _length ? _length : MAX_ZONE_MAP_INDEX_SIZE; slice->data = (char*)pool->allocate(slice->size); return type_value; } @@ -493,10 +521,9 @@ public: } } - void set_to_zone_map_max(char* ch) const override { + void set_to_zone_map_max(char* ch) const override { auto slice = reinterpret_cast(ch); - int length = _length < MAX_ZONE_MAP_INDEX_SIZE ? _length : - MAX_ZONE_MAP_INDEX_SIZE; + int length = _length < MAX_ZONE_MAP_INDEX_SIZE ? _length : MAX_ZONE_MAP_INDEX_SIZE; slice->size = length; memset(slice->data, 0xFF, slice->size); } @@ -507,13 +534,13 @@ public: explicit VarcharField() : Field() {} explicit VarcharField(const TabletColumn& column) : Field(column) {} - size_t get_variable_len() const override { return _length - OLAP_STRING_MAX_BYTES; } + size_t get_variable_len() const override { return _length - OLAP_VARCHAR_MAX_BYTES; } - // minus OLAP_STRING_MAX_BYTES here just for being compatible with old storage format + // minus OLAP_VARCHAR_MAX_BYTES here just for being compatible with old storage format char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { auto slice = (Slice*)cell_ptr; slice->data = variable_ptr; - slice->size = _length - OLAP_STRING_MAX_BYTES; + slice->size = _length - OLAP_VARCHAR_MAX_BYTES; variable_ptr += slice->size; return variable_ptr; } @@ -531,11 +558,10 @@ public: // To prevent zone map cost too many memory, if varchar length // longer than `MAX_ZONE_MAP_INDEX_SIZE`. we just allocate // `MAX_ZONE_MAP_INDEX_SIZE` of memory - char* allocate_zone_map_value(MemPool *pool) const override { + char* allocate_zone_map_value(MemPool* pool) const override { char* type_value = (char*)pool->allocate(sizeof(Slice)); auto slice = reinterpret_cast(type_value); - slice->size = MAX_ZONE_MAP_INDEX_SIZE > _length ? _length : - MAX_ZONE_MAP_INDEX_SIZE; + slice->size = MAX_ZONE_MAP_INDEX_SIZE > _length ? _length : MAX_ZONE_MAP_INDEX_SIZE; slice->data = (char*)pool->allocate(slice->size); return type_value; } @@ -553,18 +579,57 @@ public: void set_to_max(char* ch) const override { auto slice = reinterpret_cast(ch); - slice->size = _length - OLAP_STRING_MAX_BYTES; + slice->size = _length - OLAP_VARCHAR_MAX_BYTES; + memset(slice->data, 0xFF, slice->size); + } + void set_to_zone_map_max(char* ch) const override { + auto slice = reinterpret_cast(ch); + int length = _length < MAX_ZONE_MAP_INDEX_SIZE ? _length : MAX_ZONE_MAP_INDEX_SIZE; + + slice->size = length - OLAP_VARCHAR_MAX_BYTES; + memset(slice->data, 0xFF, slice->size); + } +}; +class StringField : public Field { +public: + explicit StringField() : Field() {} + explicit StringField(const TabletColumn& column) : Field(column) {} + + // minus OLAP_VARCHAR_MAX_BYTES here just for being compatible with old storage format + char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { + return variable_ptr; + } + + StringField* clone() const override { + auto* local = new StringField(); + Field::clone(local); + return local; + } + + char* allocate_value(MemPool* pool) const override { + return Field::allocate_string_value(pool); + } + + char* allocate_zone_map_value(MemPool* pool) const override { + char* type_value = (char*)pool->allocate(sizeof(Slice)); + auto slice = reinterpret_cast(type_value); + slice->size = MAX_ZONE_MAP_INDEX_SIZE; + slice->data = (char*)pool->allocate(slice->size); + return type_value; + } + void set_to_max(char* ch) const override { + auto slice = reinterpret_cast(ch); memset(slice->data, 0xFF, slice->size); } void set_to_zone_map_max(char* ch) const override { auto slice = reinterpret_cast(ch); - int length = _length < MAX_ZONE_MAP_INDEX_SIZE ? _length : - MAX_ZONE_MAP_INDEX_SIZE; - - slice->size = length - OLAP_STRING_MAX_BYTES; memset(slice->data, 0xFF, slice->size); } + void set_to_zone_map_min(char* ch) const override { + auto slice = reinterpret_cast(ch); + memset(slice->data, 0x00, slice->size); + } }; class BitmapAggField : public Field { @@ -625,6 +690,8 @@ public: return new CharField(column); case OLAP_FIELD_TYPE_VARCHAR: return new VarcharField(column); + case OLAP_FIELD_TYPE_STRING: + return new StringField(column); case OLAP_FIELD_TYPE_ARRAY: { std::unique_ptr item_field(FieldFactory::create(column.get_sub_column(0))); auto* local = new ArrayField(column); @@ -649,6 +716,8 @@ public: return new CharField(column); case OLAP_FIELD_TYPE_VARCHAR: return new VarcharField(column); + case OLAP_FIELD_TYPE_STRING: + return new StringField(column); case OLAP_FIELD_TYPE_ARRAY: { std::unique_ptr item_field(FieldFactory::create(column.get_sub_column(0))); auto* local = new ArrayField(column); diff --git a/be/src/olap/key_coder.cpp b/be/src/olap/key_coder.cpp index 1a18662d8a..3e2ab5ba55 100644 --- a/be/src/olap/key_coder.cpp +++ b/be/src/olap/key_coder.cpp @@ -71,6 +71,7 @@ private: add_mapping(); add_mapping(); add_mapping(); + add_mapping(); add_mapping(); } diff --git a/be/src/olap/key_coder.h b/be/src/olap/key_coder.h index 520b2a061c..b83bded315 100644 --- a/be/src/olap/key_coder.h +++ b/be/src/olap/key_coder.h @@ -256,4 +256,33 @@ public: } }; +template <> +class KeyCoderTraits { +public: + static void full_encode_ascending(const void* value, std::string* buf) { + auto slice = reinterpret_cast(value); + buf->append(slice->get_data(), slice->get_size()); + } + + static void encode_ascending(const void* value, size_t index_size, std::string* buf) { + const Slice* slice = (const Slice*)value; + size_t copy_size = std::min(index_size, slice->size); + buf->append(slice->data, copy_size); + } + + static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr, + MemPool* pool) { + CHECK(encoded_key->size <= index_size) + << "encoded_key size is larger than index_size, key_size=" << encoded_key->size + << ", index_size=" << index_size; + auto copy_size = encoded_key->size; + Slice* slice = (Slice*)cell_ptr; + slice->data = (char*)pool->allocate(copy_size); + slice->size = copy_size; + memcpy(slice->data, encoded_key->data, copy_size); + encoded_key->remove_prefix(copy_size); + return Status::OK(); + } +}; + } // namespace doris diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 9e0ebe9f83..076987066e 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -54,16 +54,16 @@ enum CompactionType { BASE_COMPACTION = 1, CUMULATIVE_COMPACTION = 2 }; struct DataDirInfo { std::string path; size_t path_hash = 0; - int64_t disk_capacity = 1; // actual disk capacity - int64_t available = 0; // 可用空间,单位字节 + int64_t disk_capacity = 1; // actual disk capacity + int64_t available = 0; // 可用空间,单位字节 int64_t data_used_capacity = 0; - bool is_used = false; // 是否可用标识 + bool is_used = false; // 是否可用标识 TStorageMedium::type storage_medium = TStorageMedium::HDD; // 存储介质类型:SSD|HDD }; // Sort DataDirInfo by available space. struct DataDirInfoLessAvailability { - bool operator() (const DataDirInfo& left, const DataDirInfo& right) const { + bool operator()(const DataDirInfo& left, const DataDirInfo& right) const { return left.available < right.available; } }; @@ -94,11 +94,8 @@ struct TabletInfo { }; struct TabletSize { - TabletSize(TTabletId in_tablet_id, TSchemaHash in_schema_hash, size_t in_tablet_size) : - tablet_id(in_tablet_id), - schema_hash(in_schema_hash), - tablet_size(in_tablet_size) {} - + TabletSize(TTabletId in_tablet_id, TSchemaHash in_schema_hash, size_t in_tablet_size) + : tablet_id(in_tablet_id), schema_hash(in_schema_hash), tablet_size(in_tablet_size) {} TTabletId tablet_id; TSchemaHash schema_hash; @@ -141,11 +138,12 @@ enum FieldType { OLAP_FIELD_TYPE_STRUCT = 18, // Struct OLAP_FIELD_TYPE_ARRAY = 19, // ARRAY OLAP_FIELD_TYPE_MAP = 20, // Map - OLAP_FIELD_TYPE_UNKNOWN = 21, // UNKNOW Type + OLAP_FIELD_TYPE_UNKNOWN = 21, // UNKNOW OLAP_FIELD_TYPE_STRING OLAP_FIELD_TYPE_NONE = 22, OLAP_FIELD_TYPE_HLL = 23, OLAP_FIELD_TYPE_BOOL = 24, - OLAP_FIELD_TYPE_OBJECT = 25 + OLAP_FIELD_TYPE_OBJECT = 25, + OLAP_FIELD_TYPE_STRING = 26 }; // 定义Field支持的所有聚集方法 diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index 75b10879d9..f140f6b4ab 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -51,14 +51,20 @@ static const uint64_t OLAP_FIX_HEADER_MAGIC_NUMBER = 0; static constexpr uint32_t OLAP_COMPACTION_DEFAULT_CANDIDATE_SIZE = 10; // the max length supported for varchar type -static const uint16_t OLAP_STRING_MAX_LENGTH = 65535; +static const uint16_t OLAP_VARCHAR_MAX_LENGTH = 65535; + +// the max length supported for string type 2GB +static const uint32_t OLAP_STRING_MAX_LENGTH = 2147483647; + // the max length supported for array static const uint16_t OLAP_ARRAY_MAX_LENGTH = 65535; // the max bytes for stored string length using StringOffsetType = uint32_t; -using StringLengthType = uint16_t; +using StringLengthType = uint32_t; +using VarcharLengthType = uint16_t; static const uint16_t OLAP_STRING_MAX_BYTES = sizeof(StringLengthType); +static const uint16_t OLAP_VARCHAR_MAX_BYTES = sizeof(VarcharLengthType); // the max bytes for stored array length static const uint16_t OLAP_ARRAY_MAX_BYTES = OLAP_ARRAY_MAX_LENGTH; diff --git a/be/src/olap/olap_index.cpp b/be/src/olap/olap_index.cpp index e4adb74526..de71f518bd 100644 --- a/be/src/olap/olap_index.cpp +++ b/be/src/olap/olap_index.cpp @@ -205,6 +205,36 @@ OLAPStatus MemIndex::load_segment(const char* file, size_t* current_num_rows_per // 1. copy null_byte memory_copy(mem_ptr, storage_ptr, null_byte); + // 2. copy length and content + bool is_null = *reinterpret_cast(mem_ptr); + if (!is_null) { + size_t storage_field_bytes = + *reinterpret_cast(storage_ptr + null_byte); + Slice* slice = reinterpret_cast(mem_ptr + 1); + char* data = reinterpret_cast(_mem_pool->allocate(storage_field_bytes)); + memory_copy(data, storage_ptr + sizeof(VarcharLengthType) + null_byte, + storage_field_bytes); + slice->data = data; + slice->size = storage_field_bytes; + } + + mem_ptr += mem_row_bytes; + storage_ptr += storage_row_bytes; + } + } else if (column.type() == OLAP_FIELD_TYPE_STRING) { + mem_field_offset += sizeof(Slice) + 1; + for (size_t j = 0; j < num_entries; ++j) { + /* + * string is null_byte|length|content in OlapIndex storage + * string is in nullbyte|length|ptr in memory + * We need copy three part: nullbyte|length|content + * 1. copy null byte + * 2. copy length and content into addrs pointed by ptr + */ + + // 1. copy null_byte + memory_copy(mem_ptr, storage_ptr, null_byte); + // 2. copy length and content bool is_null = *reinterpret_cast(mem_ptr); if (!is_null) { @@ -359,7 +389,8 @@ const OLAPIndexOffset MemIndex::find(const RowCursor& k, RowCursor* helper_curso offset.offset = *it; VLOG_NOTICE << "show real offset iterator value. off=" << *it; - VLOG_NOTICE << "show result offset. seg_off=" << offset.segment << ", off=" << offset.offset; + VLOG_NOTICE << "show result offset. seg_off=" << offset.segment + << ", off=" << offset.offset; } catch (...) { OLAP_LOG_WARNING("fail to compare value in memindex. [cursor='%s' find_last=%d]", k.to_string().c_str(), find_last); diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 8afb0bfb66..b6a1ad3cd3 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -217,7 +217,7 @@ OLAPStatus PushHandler::_convert_v2(TabletSharedPtr cur_tablet, TabletSharedPtr // 1. init RowsetBuilder of cur_tablet for current push VLOG_NOTICE << "init rowset builder. tablet=" << cur_tablet->full_name() - << ", block_row_size=" << cur_tablet->num_rows_per_row_block(); + << ", block_row_size=" << cur_tablet->num_rows_per_row_block(); RowsetWriterContext context; context.rowset_id = StorageEngine::instance()->next_rowset_id(); context.tablet_uid = cur_tablet->tablet_uid(); @@ -327,7 +327,7 @@ OLAPStatus PushHandler::_convert_v2(TabletSharedPtr cur_tablet, TabletSharedPtr if (new_tablet != nullptr) { auto schema_change_handler = SchemaChangeHandler::instance(); res = schema_change_handler->schema_version_convert(cur_tablet, new_tablet, cur_rowset, - new_rowset); + new_rowset); if (res != OLAP_SUCCESS) { LOG(WARNING) << "failed to change schema version for delta." << "[res=" << res << " new_tablet='" << new_tablet->full_name() @@ -337,7 +337,7 @@ OLAPStatus PushHandler::_convert_v2(TabletSharedPtr cur_tablet, TabletSharedPtr } while (0); VLOG_TRACE << "convert delta file end. res=" << res << ", tablet=" << cur_tablet->full_name() - << ", processed_rows" << num_rows; + << ", processed_rows" << num_rows; return res; } @@ -430,7 +430,7 @@ OLAPStatus PushHandler::_convert(TabletSharedPtr cur_tablet, TabletSharedPtr new // 3. New RowsetBuilder to write data into rowset VLOG_NOTICE << "init rowset builder. tablet=" << cur_tablet->full_name() - << ", block_row_size=" << cur_tablet->num_rows_per_row_block(); + << ", block_row_size=" << cur_tablet->num_rows_per_row_block(); // 4. Init RowCursor if (OLAP_SUCCESS != (res = row.init(cur_tablet->tablet_schema()))) { @@ -488,7 +488,7 @@ OLAPStatus PushHandler::_convert(TabletSharedPtr cur_tablet, TabletSharedPtr new if (new_tablet != nullptr) { auto schema_change_handler = SchemaChangeHandler::instance(); res = schema_change_handler->schema_version_convert(cur_tablet, new_tablet, cur_rowset, - new_rowset); + new_rowset); if (res != OLAP_SUCCESS) { LOG(WARNING) << "failed to change schema version for delta." << "[res=" << res << " new_tablet='" << new_tablet->full_name() @@ -499,7 +499,7 @@ OLAPStatus PushHandler::_convert(TabletSharedPtr cur_tablet, TabletSharedPtr new SAFE_DELETE(reader); VLOG_TRACE << "convert delta file end. res=" << res << ", tablet=" << cur_tablet->full_name() - << ", processed_rows" << num_rows; + << ", processed_rows" << num_rows; return res; } @@ -610,16 +610,32 @@ OLAPStatus BinaryReader::next(RowCursor* row) { } if (column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == OLAP_FIELD_TYPE_HLL) { // Read varchar length buffer first - if (OLAP_SUCCESS != (res = _file->read(_row_buf + offset, sizeof(StringLengthType)))) { + if (OLAP_SUCCESS != (res = _file->read(_row_buf + offset, sizeof(VarcharLengthType)))) { LOG(WARNING) << "read file for one row fail. res=" << res; return res; } // Get varchar field size + field_size = *reinterpret_cast(_row_buf + offset); + offset += sizeof(VarcharLengthType); + if (field_size > column.length() - sizeof(VarcharLengthType)) { + LOG(WARNING) << "invalid data length for VARCHAR! " + << "max_len=" << column.length() - sizeof(VarcharLengthType) + << ", real_len=" << field_size; + return OLAP_ERR_PUSH_INPUT_DATA_ERROR; + } + } else if (column.type() == OLAP_FIELD_TYPE_STRING) { + // Read string length buffer first + if (OLAP_SUCCESS != (res = _file->read(_row_buf + offset, sizeof(StringLengthType)))) { + LOG(WARNING) << "read file for one row fail. res=" << res; + return res; + } + + // Get string field size field_size = *reinterpret_cast(_row_buf + offset); offset += sizeof(StringLengthType); if (field_size > column.length() - sizeof(StringLengthType)) { - LOG(WARNING) << "invalid data length for VARCHAR! " + LOG(WARNING) << "invalid data length for string! " << "max_len=" << column.length() - sizeof(StringLengthType) << ", real_len=" << field_size; return OLAP_ERR_PUSH_INPUT_DATA_ERROR; @@ -635,7 +651,7 @@ OLAPStatus BinaryReader::next(RowCursor* row) { } if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR || - column.type() == OLAP_FIELD_TYPE_HLL) { + column.type() == OLAP_FIELD_TYPE_HLL || column.type() == OLAP_FIELD_TYPE_STRING) { Slice slice(_row_buf + offset, field_size); row->set_field_content_shallow(i, reinterpret_cast(&slice)); } else { @@ -742,11 +758,22 @@ OLAPStatus LzoBinaryReader::next(RowCursor* row) { const TabletColumn& column = schema.column(i); if (column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == OLAP_FIELD_TYPE_HLL) { // Get varchar field size + field_size = *reinterpret_cast(_row_buf + _next_row_start + offset); + offset += sizeof(VarcharLengthType); + + if (field_size > column.length() - sizeof(VarcharLengthType)) { + LOG(WARNING) << "invalid data length for VARCHAR! " + << "max_len=" << column.length() - sizeof(VarcharLengthType) + << ", real_len=" << field_size; + return OLAP_ERR_PUSH_INPUT_DATA_ERROR; + } + } else if (column.type() == OLAP_FIELD_TYPE_STRING) { + // Get string field size field_size = *reinterpret_cast(_row_buf + _next_row_start + offset); offset += sizeof(StringLengthType); if (field_size > column.length() - sizeof(StringLengthType)) { - LOG(WARNING) << "invalid data length for VARCHAR! " + LOG(WARNING) << "invalid data length for string! " << "max_len=" << column.length() - sizeof(StringLengthType) << ", real_len=" << field_size; return OLAP_ERR_PUSH_INPUT_DATA_ERROR; @@ -756,7 +783,7 @@ OLAPStatus LzoBinaryReader::next(RowCursor* row) { } if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR || - column.type() == OLAP_FIELD_TYPE_HLL) { + column.type() == OLAP_FIELD_TYPE_HLL || column.type() == OLAP_FIELD_TYPE_STRING) { Slice slice(_row_buf + _next_row_start + offset, field_size); row->set_field_content_shallow(i, reinterpret_cast(&slice)); } else { diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp index 805872ab9a..477673247e 100644 --- a/be/src/olap/reader.cpp +++ b/be/src/olap/reader.cpp @@ -698,7 +698,8 @@ void Reader::_init_conditions_param(const ReaderParams& read_params) { predicate = new PREDICATE(index, value, opposite); \ break; \ } \ - case OLAP_FIELD_TYPE_VARCHAR: { \ + case OLAP_FIELD_TYPE_VARCHAR: \ + case OLAP_FIELD_TYPE_STRING: { \ StringValue value; \ int32_t length = cond.length(); \ char* buffer = reinterpret_cast(_predicate_mem_pool->allocate(length)); \ @@ -891,7 +892,8 @@ ColumnPredicate* Reader::_parse_to_predicate(const TCondition& condition, bool o } break; } - case OLAP_FIELD_TYPE_VARCHAR: { + case OLAP_FIELD_TYPE_VARCHAR: + case OLAP_FIELD_TYPE_STRING:{ phmap::flat_hash_set values; for (auto& cond_val : condition.condition_values) { StringValue value; @@ -993,7 +995,7 @@ void Reader::_init_load_bf_columns(const ReaderParams& read_params) { return; } FieldType type = _tablet->tablet_schema().column(max_equal_index).type(); - if (type != OLAP_FIELD_TYPE_VARCHAR || max_equal_index + 1 > _tablet->num_short_key_columns()) { + if ((type != OLAP_FIELD_TYPE_VARCHAR && type != OLAP_FIELD_TYPE_STRING)|| max_equal_index + 1 > _tablet->num_short_key_columns()) { _load_bf_columns.erase(max_equal_index); } } diff --git a/be/src/olap/row_block.cpp b/be/src/olap/row_block.cpp index 1b97d87e7d..a524732f14 100644 --- a/be/src/olap/row_block.cpp +++ b/be/src/olap/row_block.cpp @@ -90,7 +90,7 @@ void RowBlock::_compute_layout() { // All field has a nullbyte in memory if (column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == OLAP_FIELD_TYPE_HLL || - column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_OBJECT) { + column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_OBJECT ||column.type() == OLAP_FIELD_TYPE_STRING) { // 变长部分额外计算下实际最大的字符串长度(此处length已经包括记录Length的2个字节) memory_size += sizeof(Slice) + sizeof(char); } else { diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index 94d600d300..97cbdd1530 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -108,5 +108,14 @@ std::string RowBlockRow::debug_string() const { ss << "]"; return ss.str(); } - +std::string RowBlockV2::debug_string() { + std::stringstream ss; + for (int i = 0; i < num_rows(); ++i) { + ss << row(i).debug_string(); + if (i != num_rows() - 1) { + ss << "\n"; + } + } + return ss.str(); +} } // namespace doris diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h index 08eb290819..ece8d725a0 100644 --- a/be/src/olap/row_block2.h +++ b/be/src/olap/row_block2.h @@ -103,6 +103,7 @@ public: } _delete_state = delete_state; } + std::string debug_string(); private: Schema _schema; diff --git a/be/src/olap/row_cursor.cpp b/be/src/olap/row_cursor.cpp index 020ccf7782..e2f8b5a2f4 100644 --- a/be/src/olap/row_cursor.cpp +++ b/be/src/olap/row_cursor.cpp @@ -28,11 +28,17 @@ using std::string; using std::vector; namespace doris { -RowCursor::RowCursor() : _fixed_len(0), _variable_len(0) {} +RowCursor::RowCursor() : _fixed_len(0), _variable_len(0), _string_field_count(0), _long_text_buf(nullptr) {} RowCursor::~RowCursor() { delete[] _owned_fixed_buf; delete[] _variable_buf; + if (_string_field_count > 0 && _long_text_buf != nullptr) { + for (int i = 0; i < _string_field_count; ++i) { + free(_long_text_buf[i]); + } + free(_long_text_buf); + } } OLAPStatus RowCursor::_init(const std::vector& columns) { @@ -43,6 +49,9 @@ OLAPStatus RowCursor::_init(const std::vector& columns) { return OLAP_ERR_INIT_FAILED; } _variable_len += column_schema(cid)->get_variable_len(); + if (_schema->column(cid)->type() == OLAP_FIELD_TYPE_STRING) { + ++_string_field_count; + } } _fixed_len = _schema->schema_size(); @@ -59,7 +68,7 @@ OLAPStatus RowCursor::_init(const std::vector& columns) { OLAPStatus RowCursor::_init(const std::shared_ptr& shared_schema, const std::vector& columns) { - _schema = shared_schema; + _schema.reset(new Schema(*shared_schema.get())); return _init(columns); } @@ -80,18 +89,16 @@ OLAPStatus RowCursor::_init_scan_key(const TabletSchema& schema, const std::vect _variable_len += scan_keys[cid].length(); } else if (type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_ARRAY) { _variable_len += std::max(scan_keys[cid].length(), column.length()); + } else if (type == OLAP_FIELD_TYPE_STRING) { + ++_string_field_count; } } // variable_len for null bytes - _variable_buf = new (nothrow) char[_variable_len]; - if (_variable_buf == nullptr) { - OLAP_LOG_WARNING("Fail to malloc _variable_buf."); - return OLAP_ERR_MALLOC_ERROR; - } - memset(_variable_buf, 0, _variable_len); + RETURN_NOT_OK(_alloc_buf()); char* fixed_ptr = _fixed_buf; char* variable_ptr = _variable_buf; + char** long_text_ptr = _long_text_buf; for (auto cid : _schema->column_ids()) { const TabletColumn& column = schema.column(cid); fixed_ptr = _fixed_buf + _schema->column_offset(cid); @@ -106,6 +113,12 @@ OLAPStatus RowCursor::_init_scan_key(const TabletSchema& schema, const std::vect slice->data = variable_ptr; slice->size = std::max(scan_keys[cid].length(), column.length()); variable_ptr += slice->size; + } else if (type == OLAP_FIELD_TYPE_STRING) { + _schema->mutable_column(cid)->set_long_text_buf(long_text_ptr); + Slice* slice = reinterpret_cast(fixed_ptr + 1); + slice->data = *(long_text_ptr); + slice->size = DEFAULT_TEXT_LENGTH; + ++long_text_ptr; } } @@ -196,19 +209,26 @@ OLAPStatus RowCursor::init_scan_key(const TabletSchema& schema, OLAPStatus RowCursor::allocate_memory_for_string_type(const TabletSchema& schema) { // allocate memory for string type(char, varchar, hll, array) // The memory allocated in this function is used in aggregate and copy function - if (_variable_len == 0) { + if (_variable_len == 0 && _string_field_count == 0) { return OLAP_SUCCESS; } DCHECK(_variable_buf == nullptr) << "allocate memory twice"; - _variable_buf = new (nothrow) char[_variable_len]; - memset(_variable_buf, 0, _variable_len); - + RETURN_NOT_OK(_alloc_buf()); // init slice of char, varchar, hll type char* fixed_ptr = _fixed_buf; char* variable_ptr = _variable_buf; + char** long_text_ptr = _long_text_buf; for (auto cid : _schema->column_ids()) { fixed_ptr = _fixed_buf + _schema->column_offset(cid); - variable_ptr = column_schema(cid)->allocate_memory(fixed_ptr + 1, variable_ptr); + if (_schema->column(cid)->type() == OLAP_FIELD_TYPE_STRING) { + Slice* slice = reinterpret_cast(fixed_ptr + 1); + _schema->mutable_column(cid)->set_long_text_buf(long_text_ptr); + slice->data = *(long_text_ptr); + slice->size = DEFAULT_TEXT_LENGTH; + ++long_text_ptr; + } else if (_variable_len > 0){ + variable_ptr = column_schema(cid)->allocate_memory(fixed_ptr + 1, variable_ptr); + } } return OLAP_SUCCESS; } @@ -302,5 +322,29 @@ std::string RowCursor::to_string() const { return result; } +OLAPStatus RowCursor::_alloc_buf() { + // variable_len for null bytes + _variable_buf = new (nothrow) char[_variable_len]; + if (_variable_buf == nullptr) { + OLAP_LOG_WARNING("Fail to malloc _variable_buf."); + return OLAP_ERR_MALLOC_ERROR; + } + memset(_variable_buf, 0, _variable_len); + if (_string_field_count > 0) { + _long_text_buf = (char**)malloc(_string_field_count * sizeof(char*)); + if (_long_text_buf == nullptr) { + OLAP_LOG_WARNING("Fail to malloc _long_text_buf."); + return OLAP_ERR_MALLOC_ERROR; + } + for (int i = 0; i < _string_field_count; ++i) { + _long_text_buf[i] = (char*)malloc(DEFAULT_TEXT_LENGTH * sizeof(char)); + if (_long_text_buf[i] == nullptr) { + OLAP_LOG_WARNING("Fail to malloc _long_text_buf."); + return OLAP_ERR_MALLOC_ERROR; + } + } + } + return OLAP_SUCCESS; +} } // namespace doris diff --git a/be/src/olap/row_cursor.h b/be/src/olap/row_cursor.h index 222a4f985c..bfffe3cbc9 100644 --- a/be/src/olap/row_cursor.h +++ b/be/src/olap/row_cursor.h @@ -34,6 +34,8 @@ class Field; // 代理一行数据的操作 class RowCursor { public: + static const int DEFAULT_TEXT_LENGTH = 128; + RowCursor(); // 遍历销毁field指针 @@ -152,10 +154,11 @@ private: const std::vector& columns); // common init function OLAPStatus _init(const std::vector& schema, const std::vector& columns); + inline OLAPStatus _alloc_buf(); OLAPStatus _init_scan_key(const TabletSchema& schema, const std::vector& scan_keys); - std::shared_ptr _schema; + std::unique_ptr _schema; char* _fixed_buf = nullptr; // point to fixed buf size_t _fixed_len; @@ -163,6 +166,8 @@ private: char* _variable_buf = nullptr; size_t _variable_len; + size_t _string_field_count; + char** _long_text_buf; DISALLOW_COPY_AND_ASSIGN(RowCursor); }; diff --git a/be/src/olap/rowset/column_data.cpp b/be/src/olap/rowset/column_data.cpp index 03dc867e79..4f18a9cbb9 100644 --- a/be/src/olap/rowset/column_data.cpp +++ b/be/src/olap/rowset/column_data.cpp @@ -282,7 +282,7 @@ OLAPStatus ColumnData::_seek_to_row(const RowCursor& key, bool find_last_key, bo const TabletSchema& tablet_schema = _segment_group->get_tablet_schema(); FieldType type = tablet_schema.column(key.field_count() - 1).type(); if (key.field_count() > _segment_group->get_num_short_key_columns() || - OLAP_FIELD_TYPE_VARCHAR == type) { + OLAP_FIELD_TYPE_VARCHAR == type || OLAP_FIELD_TYPE_STRING == type) { res = _find_position_by_full_key(key, find_last_key, &position); } else { res = _find_position_by_short_key(key, find_last_key, &position); diff --git a/be/src/olap/rowset/column_reader.cpp b/be/src/olap/rowset/column_reader.cpp index bd11999f49..25fc9e0b37 100644 --- a/be/src/olap/rowset/column_reader.cpp +++ b/be/src/olap/rowset/column_reader.cpp @@ -626,7 +626,8 @@ ColumnReader* ColumnReader::create(uint32_t column_id, const std::vector( column_id, column_unique_id, column.length(), dictionary_size); diff --git a/be/src/olap/rowset/column_reader.h b/be/src/olap/rowset/column_reader.h index 9df533deb5..f0eeffd29c 100644 --- a/be/src/olap/rowset/column_reader.h +++ b/be/src/olap/rowset/column_reader.h @@ -333,7 +333,8 @@ public: } case OLAP_FIELD_TYPE_VARCHAR: case OLAP_FIELD_TYPE_OBJECT: - case OLAP_FIELD_TYPE_HLL: { + case OLAP_FIELD_TYPE_HLL: + case OLAP_FIELD_TYPE_STRING: { _values = reinterpret_cast(mem_pool->allocate(size * sizeof(Slice))); int32_t length = _default_value.length(); char* string_buffer = reinterpret_cast(mem_pool->allocate(size * length)); diff --git a/be/src/olap/rowset/column_writer.cpp b/be/src/olap/rowset/column_writer.cpp index ce66fcead9..bdaf575e1b 100644 --- a/be/src/olap/rowset/column_writer.cpp +++ b/be/src/olap/rowset/column_writer.cpp @@ -108,7 +108,8 @@ ColumnWriter* ColumnWriter::create(uint32_t column_id, const TabletSchema& schem } case OLAP_FIELD_TYPE_VARCHAR: case OLAP_FIELD_TYPE_OBJECT: - case OLAP_FIELD_TYPE_HLL: { + case OLAP_FIELD_TYPE_HLL: + case OLAP_FIELD_TYPE_STRING: { column_writer = new (std::nothrow) VarStringColumnWriter(column_id, stream_factory, column, num_rows_per_row_block, bf_fpp); break; @@ -237,7 +238,8 @@ OLAPStatus ColumnWriter::write(RowCursor* row_cursor) { if (!is_null) { if (_column.type() == OLAP_FIELD_TYPE_CHAR || _column.type() == OLAP_FIELD_TYPE_VARCHAR || - _column.type() == OLAP_FIELD_TYPE_HLL) { + _column.type() == OLAP_FIELD_TYPE_HLL || + _column.type() == OLAP_FIELD_TYPE_STRING) { Slice* slice = reinterpret_cast(buf); _bf->add_bytes(slice->data, slice->size); } else { diff --git a/be/src/olap/rowset/segment_group.cpp b/be/src/olap/rowset/segment_group.cpp index 5cf9dc5623..9f7b79031e 100644 --- a/be/src/olap/rowset/segment_group.cpp +++ b/be/src/olap/rowset/segment_group.cpp @@ -94,7 +94,7 @@ SegmentGroup::SegmentGroup(int64_t tablet_id, const RowsetId& rowset_id, const T const TabletColumn& column = _schema->column(i); _short_key_columns.push_back(column); _short_key_length += column.index_length() + 1; // 1 for null byte - if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR) { + if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == OLAP_FIELD_TYPE_STRING) { _new_short_key_length += sizeof(Slice) + 1; } else { _new_short_key_length += column.index_length() + 1; @@ -132,7 +132,7 @@ SegmentGroup::SegmentGroup(int64_t tablet_id, const RowsetId& rowset_id, const T const TabletColumn& column = _schema->column(i); _short_key_columns.push_back(column); _short_key_length += column.index_length() + 1; // 1 for null byte - if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR) { + if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == OLAP_FIELD_TYPE_STRING) { _new_short_key_length += sizeof(Slice) + 1; } else { _new_short_key_length += column.index_length() + 1; diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp index a65cdf2130..6fae7eb976 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp @@ -244,6 +244,7 @@ Status BinaryDictPageDecoder::next_batch(size_t* n, ColumnBlockView* dst) { return Status::OK(); } Slice* out = reinterpret_cast(dst->data()); + _batch->resize(*n); ColumnBlock column_block(_batch.get(), dst->column_block()->pool()); diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp b/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp index 2639b0b9e1..babfdb38c6 100644 --- a/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp @@ -217,6 +217,9 @@ Status BitmapIndexWriter::create(const TypeInfo* typeinfo, case OLAP_FIELD_TYPE_VARCHAR: res->reset(new BitmapIndexWriterImpl(typeinfo)); break; + case OLAP_FIELD_TYPE_STRING: + res->reset(new BitmapIndexWriterImpl(typeinfo)); + break; case OLAP_FIELD_TYPE_DATE: res->reset(new BitmapIndexWriterImpl(typeinfo)); break; diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp index 401efa4801..d45b2deb27 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp @@ -155,7 +155,7 @@ public: private: // supported slice types are: OLAP_FIELD_TYPE_CHAR|OLAP_FIELD_TYPE_VARCHAR static constexpr bool _is_slice_type() { - return field_type == OLAP_FIELD_TYPE_VARCHAR || field_type == OLAP_FIELD_TYPE_CHAR; + return field_type == OLAP_FIELD_TYPE_VARCHAR || field_type == OLAP_FIELD_TYPE_CHAR || field_type == OLAP_FIELD_TYPE_STRING; } static constexpr bool _is_int128() { return field_type == OLAP_FIELD_TYPE_LARGEINT; } @@ -202,6 +202,9 @@ Status BloomFilterIndexWriter::create(const BloomFilterOptions& bf_options, case OLAP_FIELD_TYPE_VARCHAR: res->reset(new BloomFilterIndexWriterImpl(bf_options, typeinfo)); break; + case OLAP_FIELD_TYPE_STRING: + res->reset(new BloomFilterIndexWriterImpl(bf_options, typeinfo)); + break; case OLAP_FIELD_TYPE_DATE: res->reset(new BloomFilterIndexWriterImpl(bf_options, typeinfo)); break; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 0eda8f373d..bbfba19172 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -679,7 +679,8 @@ Status DefaultValueColumnIterator::init(const ColumnIteratorOptions& opts) { ((Slice*)_mem_value)->data = string_buffer; } else if (_type_info->type() == OLAP_FIELD_TYPE_VARCHAR || _type_info->type() == OLAP_FIELD_TYPE_HLL || - _type_info->type() == OLAP_FIELD_TYPE_OBJECT) { + _type_info->type() == OLAP_FIELD_TYPE_OBJECT || + _type_info->type() == OLAP_FIELD_TYPE_STRING) { int32_t length = _default_value.length(); char* string_buffer = reinterpret_cast(_pool->allocate(length)); memory_copy(string_buffer, _default_value.c_str(), length); diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp b/be/src/olap/rowset/segment_v2/encoding_info.cpp index 322ed7f507..e4ce43cf77 100644 --- a/be/src/olap/rowset/segment_v2/encoding_info.cpp +++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp @@ -255,6 +255,10 @@ EncodingInfoResolver::EncodingInfoResolver() { _add_map(); _add_map(); + _add_map(); + _add_map(); + _add_map(); + _add_map(); _add_map(); _add_map(); diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h index 0a7c8e9919..39e3c7914c 100644 --- a/be/src/olap/schema.h +++ b/be/src/olap/schema.h @@ -102,6 +102,8 @@ public: const Field* column(ColumnId cid) const { return _cols[cid]; } + Field* mutable_column(ColumnId cid) const { return _cols[cid]; } + size_t num_key_columns() const { return _num_key_columns; } size_t schema_size() const { return _schema_size; } diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index b5d538249d..04e5070f11 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -244,6 +244,7 @@ ConvertTypeResolver::ConvertTypeResolver() { add_convert_type_mapping(); add_convert_type_mapping(); add_convert_type_mapping(); + add_convert_type_mapping(); // to varchar type add_convert_type_mapping(); @@ -255,6 +256,30 @@ ConvertTypeResolver::ConvertTypeResolver() { add_convert_type_mapping(); add_convert_type_mapping(); add_convert_type_mapping(); + add_convert_type_mapping(); + + // from string + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + + // to string + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); + add_convert_type_mapping(); add_convert_type_mapping(); @@ -355,7 +380,8 @@ bool hll_hash(RowCursor* read_helper, RowCursor* write_helper, const TabletColum slice->size = p + 1; } - case OLAP_FIELD_TYPE_VARCHAR: { + case OLAP_FIELD_TYPE_VARCHAR: + case OLAP_FIELD_TYPE_STRING: { Slice slice = *reinterpret_cast(read_helper->cell_ptr(ref_field_idx)); hash_value = HashUtil::murmur_hash64A(slice.data, slice.size, HashUtil::MURMUR_SEED); break; diff --git a/be/src/olap/stream_index_common.cpp b/be/src/olap/stream_index_common.cpp index d01f070838..e831904e19 100644 --- a/be/src/olap/stream_index_common.cpp +++ b/be/src/olap/stream_index_common.cpp @@ -36,7 +36,8 @@ OLAPStatus ColumnStatistics::init(const FieldType& type, bool null_supported) { _null_supported = null_supported; if (type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_VARCHAR || - type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT) { + type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT || + type == OLAP_FIELD_TYPE_STRING) { _ignored = true; } else { // 当数据类型为 String和varchar或是未知类型时,实际上不会有统计信息。 diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index a2a73a9980..824cdaa16b 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -156,7 +156,8 @@ void TabletMeta::_init_column_from_tcolumn(uint32_t unique_id, const TColumn& tc tcolumn.column_type.len); column->set_length(length); column->set_index_length(length); - if (tcolumn.column_type.type == TPrimitiveType::VARCHAR) { + if (tcolumn.column_type.type == TPrimitiveType::VARCHAR || + tcolumn.column_type.type == TPrimitiveType::STRING) { if (!tcolumn.column_type.__isset.index_len) { column->set_index_length(10); } else { diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 09f456d637..dd29b73002 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -60,6 +60,8 @@ FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) { type = OLAP_FIELD_TYPE_DECIMAL; } else if (0 == upper_type_str.compare(0, 7, "VARCHAR")) { type = OLAP_FIELD_TYPE_VARCHAR; + } else if (0 == upper_type_str.compare("STRING")) { + type = OLAP_FIELD_TYPE_STRING; } else if (0 == upper_type_str.compare("BOOLEAN")) { type = OLAP_FIELD_TYPE_BOOL; } else if (0 == upper_type_str.compare(0, 3, "HLL")) { @@ -164,6 +166,9 @@ std::string TabletColumn::get_string_by_field_type(FieldType type) { case OLAP_FIELD_TYPE_VARCHAR: return "VARCHAR"; + case OLAP_FIELD_TYPE_STRING: + return "STRING"; + case OLAP_FIELD_TYPE_BOOL: return "BOOLEAN"; @@ -245,6 +250,8 @@ uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint3 return string_length; case TPrimitiveType::VARCHAR: case TPrimitiveType::HLL: + return string_length + sizeof(OLAP_VARCHAR_MAX_LENGTH); + case TPrimitiveType::STRING: return string_length + sizeof(OLAP_STRING_MAX_LENGTH); case TPrimitiveType::ARRAY: return OLAP_ARRAY_MAX_LENGTH; diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp index b26e069aa0..3778c66a6a 100644 --- a/be/src/olap/types.cpp +++ b/be/src/olap/types.cpp @@ -79,6 +79,7 @@ ScalarTypeInfoResolver::ScalarTypeInfoResolver() { add_mapping(); add_mapping(); add_mapping(); + add_mapping(); add_mapping(); add_mapping(); } @@ -139,6 +140,7 @@ ArrayTypeInfoResolver::ArrayTypeInfoResolver() { add_mapping(); add_mapping(); add_mapping(); + add_mapping(); } // equal to get_scalar_type_info diff --git a/be/src/olap/types.h b/be/src/olap/types.h index 0dc5a9a838..d1ededb4bc 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -454,6 +454,10 @@ struct CppTypeTraits { using CppType = Slice; }; template <> +struct CppTypeTraits { + using CppType = Slice; +}; +template <> struct CppTypeTraits { using CppType = Slice; }; @@ -591,7 +595,8 @@ struct NumericFieldtypeTraits : public BaseFieldtypeTraits { static OLAPStatus convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool) { - if (src_type->type() == OLAP_FIELD_TYPE_VARCHAR) { + if (src_type->type() == OLAP_FIELD_TYPE_VARCHAR || + src_type->type() == OLAP_FIELD_TYPE_STRING) { return arithmetic_convert_from_varchar(dest, src); } else if (src_type->type() == OLAP_FIELD_TYPE_CHAR) { return numeric_convert_from_char(dest, src); @@ -866,7 +871,8 @@ struct FieldTypeTraits : public BaseFieldtypeTraitstype() == OLAP_FIELD_TYPE_VARCHAR || - src_type->type() == OLAP_FIELD_TYPE_CHAR) { + src_type->type() == OLAP_FIELD_TYPE_CHAR || + src_type->type() == OLAP_FIELD_TYPE_STRING) { if (src_type->type() == OLAP_FIELD_TYPE_CHAR) { prepare_char_before_convert(src); } @@ -969,9 +975,9 @@ struct FieldTypeTraits : public BaseFieldtypeTraits OLAP_STRING_MAX_LENGTH) { + if (value_len > OLAP_VARCHAR_MAX_LENGTH) { LOG(WARNING) << "the len of value string is too long, len=" << value_len - << ", max_len=" << OLAP_STRING_MAX_LENGTH; + << ", max_len=" << OLAP_VARCHAR_MAX_LENGTH; return OLAP_ERR_INPUT_PARAMETER_ERROR; } @@ -1040,6 +1046,55 @@ struct FieldTypeTraits : public BaseFieldtypeTraits struct FieldTypeTraits : public FieldTypeTraits { + static OLAPStatus from_string(void* buf, const std::string& scan_key) { + size_t value_len = scan_key.length(); + if (value_len > OLAP_VARCHAR_MAX_LENGTH) { + LOG(WARNING) << "the len of value string is too long, len=" << value_len + << ", max_len=" << OLAP_VARCHAR_MAX_LENGTH; + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + + auto slice = reinterpret_cast(buf); + memory_copy(slice->data, scan_key.c_str(), value_len); + slice->size = value_len; + return OLAP_SUCCESS; + } + + static OLAPStatus convert_from(void* dest, const void* src, const TypeInfo* src_type, + MemPool* mem_pool) { + switch (src_type->type()) { + case OLAP_FIELD_TYPE_TINYINT: + case OLAP_FIELD_TYPE_SMALLINT: + case OLAP_FIELD_TYPE_INT: + case OLAP_FIELD_TYPE_BIGINT: + case OLAP_FIELD_TYPE_LARGEINT: + case OLAP_FIELD_TYPE_FLOAT: + case OLAP_FIELD_TYPE_DOUBLE: + case OLAP_FIELD_TYPE_DECIMAL: { + auto result = src_type->to_string(src); + auto slice = reinterpret_cast(dest); + slice->data = reinterpret_cast(mem_pool->allocate(result.size())); + memcpy(slice->data, result.c_str(), result.size()); + slice->size = result.size(); + return OLAP_SUCCESS; + } + case OLAP_FIELD_TYPE_CHAR: + prepare_char_before_convert(src); + deep_copy(dest, src, mem_pool); + return OLAP_SUCCESS; + default: + return OLAP_ERR_INVALID_SCHEMA; + } + } + + static void set_to_min(void* buf) { + auto slice = reinterpret_cast(buf); + slice->size = 0; + } +}; + +template <> +struct FieldTypeTraits : public FieldTypeTraits { static OLAPStatus from_string(void* buf, const std::string& scan_key) { size_t value_len = scan_key.length(); if (value_len > OLAP_STRING_MAX_LENGTH) { @@ -1056,24 +1111,30 @@ struct FieldTypeTraits : public FieldTypeTraitstype() == OLAP_FIELD_TYPE_TINYINT || - src_type->type() == OLAP_FIELD_TYPE_SMALLINT || - src_type->type() == OLAP_FIELD_TYPE_INT || src_type->type() == OLAP_FIELD_TYPE_BIGINT || - src_type->type() == OLAP_FIELD_TYPE_LARGEINT || - src_type->type() == OLAP_FIELD_TYPE_FLOAT || - src_type->type() == OLAP_FIELD_TYPE_DOUBLE || - src_type->type() == OLAP_FIELD_TYPE_DECIMAL) { + switch (src_type->type()) { + case OLAP_FIELD_TYPE_TINYINT: + case OLAP_FIELD_TYPE_SMALLINT: + case OLAP_FIELD_TYPE_INT: + case OLAP_FIELD_TYPE_BIGINT: + case OLAP_FIELD_TYPE_LARGEINT: + case OLAP_FIELD_TYPE_FLOAT: + case OLAP_FIELD_TYPE_DOUBLE: + case OLAP_FIELD_TYPE_DECIMAL: { auto result = src_type->to_string(src); auto slice = reinterpret_cast(dest); slice->data = reinterpret_cast(mem_pool->allocate(result.size())); memcpy(slice->data, result.c_str(), result.size()); slice->size = result.size(); return OLAP_SUCCESS; - } else if (src_type->type() == OLAP_FIELD_TYPE_CHAR) { - prepare_char_before_convert(src); - deep_copy(dest, src, mem_pool); } - return OLAP_ERR_INVALID_SCHEMA; + case OLAP_FIELD_TYPE_CHAR: + prepare_char_before_convert(src); + case OLAP_FIELD_TYPE_VARCHAR: + deep_copy(dest, src, mem_pool); + return OLAP_SUCCESS; + default: + return OLAP_ERR_INVALID_SCHEMA; + } } static void set_to_min(void* buf) { diff --git a/be/src/olap/wrapper_field.cpp b/be/src/olap/wrapper_field.cpp index 4d1b9d4602..61f5f88bb0 100644 --- a/be/src/olap/wrapper_field.cpp +++ b/be/src/olap/wrapper_field.cpp @@ -24,10 +24,12 @@ const size_t DEFAULT_STRING_LENGTH = 50; WrapperField* WrapperField::create(const TabletColumn& column, uint32_t len) { bool is_string_type = (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR || - column.type() == OLAP_FIELD_TYPE_HLL || column.type() == OLAP_FIELD_TYPE_OBJECT); - if (is_string_type && len > OLAP_STRING_MAX_LENGTH) { + column.type() == OLAP_FIELD_TYPE_HLL || column.type() == OLAP_FIELD_TYPE_OBJECT || + column.type() == OLAP_FIELD_TYPE_STRING); + size_t max_length = column.type() == OLAP_FIELD_TYPE_STRING ? OLAP_STRING_MAX_LENGTH : OLAP_VARCHAR_MAX_LENGTH; + if (is_string_type && len > max_length) { OLAP_LOG_WARNING("length of string parameter is too long[len=%lu, max_len=%lu].", len, - OLAP_STRING_MAX_LENGTH); + max_length); return nullptr; } @@ -40,6 +42,12 @@ WrapperField* WrapperField::create(const TabletColumn& column, uint32_t len) { if (column.type() == OLAP_FIELD_TYPE_CHAR) { variable_len = std::max(len, (uint32_t)(column.length())); } else if (column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == OLAP_FIELD_TYPE_HLL) { + // column.length is the serialized varchar length + // the first sizeof(VarcharLengthType) bytes is the length of varchar + // variable_len is the real length of varchar + variable_len = + std::max(len, static_cast(column.length() - sizeof(VarcharLengthType))); + } else if (column.type() == OLAP_FIELD_TYPE_STRING) { // column.length is the serialized varchar length // the first sizeof(StringLengthType) bytes is the length of varchar // variable_len is the real length of varchar @@ -59,7 +67,8 @@ WrapperField* WrapperField::create_by_type(const FieldType& type, int32_t var_le return nullptr; } bool is_string_type = (type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_VARCHAR || - type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT); + type == OLAP_FIELD_TYPE_HLL || type == OLAP_FIELD_TYPE_OBJECT || + type == OLAP_FIELD_TYPE_STRING); auto wrapper = new WrapperField(rep, var_length, is_string_type); return wrapper; } diff --git a/be/src/runtime/dpp_sink.cpp b/be/src/runtime/dpp_sink.cpp index 52e6a59d24..bfcabf256f 100644 --- a/be/src/runtime/dpp_sink.cpp +++ b/be/src/runtime/dpp_sink.cpp @@ -484,13 +484,14 @@ Status Translator::create_value_updaters() { break; } case TYPE_CHAR: - case TYPE_VARCHAR: { + case TYPE_VARCHAR: + case TYPE_STRING: { switch (_rollup_schema.value_ops()[i]) { case TAggregationType::MAX: case TAggregationType::MIN: case TAggregationType::SUM: return Status::InternalError( - "Unsupported max/min/sum operation on char/varchar column."); + "Unsupported max/min/sum operation on char/varchar/string column."); default: // Only replace has meaning _value_updaters.push_back(fake_update); diff --git a/be/src/runtime/dpp_writer.cpp b/be/src/runtime/dpp_writer.cpp index a61facf1e2..b2a4457441 100644 --- a/be/src/runtime/dpp_writer.cpp +++ b/be/src/runtime/dpp_writer.cpp @@ -178,6 +178,7 @@ Status DppWriter::append_one_row(TupleRow* row) { } case TYPE_VARCHAR: { case TYPE_HLL: + case TYPE_STRING: const StringValue* str_val = (const StringValue*)(item); if (UNLIKELY(str_val->ptr == nullptr && str_val->len != 0)) { return Status::InternalError("String value ptr is null"); diff --git a/be/src/runtime/export_sink.cpp b/be/src/runtime/export_sink.cpp index 1aaaade101..28392a277b 100644 --- a/be/src/runtime/export_sink.cpp +++ b/be/src/runtime/export_sink.cpp @@ -176,7 +176,8 @@ Status ExportSink::gen_row_buffer(TupleRow* row, std::stringstream* ss) { break; } case TYPE_VARCHAR: - case TYPE_CHAR: { + case TYPE_CHAR: + case TYPE_STRING: { const StringValue* string_val = (const StringValue*)(item); if (string_val->ptr == NULL) { diff --git a/be/src/runtime/file_result_writer.cpp b/be/src/runtime/file_result_writer.cpp index 197af11841..9377075c11 100644 --- a/be/src/runtime/file_result_writer.cpp +++ b/be/src/runtime/file_result_writer.cpp @@ -259,7 +259,8 @@ Status FileResultWriter::_write_one_row_as_csv(TupleRow* row) { break; } case TYPE_VARCHAR: - case TYPE_CHAR: { + case TYPE_CHAR: + case TYPE_STRING: { const StringValue* string_val = (const StringValue*)(item); if (string_val->ptr == NULL) { if (string_val->len != 0) { diff --git a/be/src/runtime/fold_constant_mgr.cpp b/be/src/runtime/fold_constant_mgr.cpp index 6257205a92..6c39fc533e 100644 --- a/be/src/runtime/fold_constant_mgr.cpp +++ b/be/src/runtime/fold_constant_mgr.cpp @@ -182,6 +182,7 @@ string FoldConstantMgr::get_result(void* src, PrimitiveType slot_type){ } case TYPE_CHAR: case TYPE_VARCHAR: + case TYPE_STRING: case TYPE_HLL: case TYPE_OBJECT: { return (reinterpret_cast(src))->to_string(); diff --git a/be/src/runtime/mysql_result_writer.cpp b/be/src/runtime/mysql_result_writer.cpp index 444e0dbfa7..74010cf8f7 100644 --- a/be/src/runtime/mysql_result_writer.cpp +++ b/be/src/runtime/mysql_result_writer.cpp @@ -126,7 +126,8 @@ int MysqlResultWriter::_add_row_value(int index, const TypeDescriptor& type, voi } case TYPE_VARCHAR: - case TYPE_CHAR: { + case TYPE_CHAR: + case TYPE_STRING: { const StringValue* string_val = (const StringValue*)(item); if (string_val->ptr == NULL) { diff --git a/be/src/runtime/mysql_table_writer.cpp b/be/src/runtime/mysql_table_writer.cpp index 620e3f5eb0..6365ad764a 100644 --- a/be/src/runtime/mysql_table_writer.cpp +++ b/be/src/runtime/mysql_table_writer.cpp @@ -118,7 +118,8 @@ Status MysqlTableWriter::insert_row(TupleRow* row) { break; } case TYPE_VARCHAR: - case TYPE_CHAR: { + case TYPE_CHAR: + case TYPE_STRING: { const StringValue* string_val = (const StringValue*)(item); if (string_val->ptr == NULL) { diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 320e58ba0f..53df75bc03 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -78,6 +78,9 @@ PrimitiveType thrift_to_type(TPrimitiveType::type ttype) { case TPrimitiveType::VARCHAR: return TYPE_VARCHAR; + case TPrimitiveType::STRING: + return TYPE_STRING; + case TPrimitiveType::BINARY: return TYPE_BINARY; @@ -145,6 +148,9 @@ TPrimitiveType::type to_thrift(PrimitiveType ptype) { case TYPE_VARCHAR: return TPrimitiveType::VARCHAR; + case TYPE_STRING: + return TPrimitiveType::STRING; + case TYPE_BINARY: return TPrimitiveType::BINARY; @@ -212,6 +218,9 @@ std::string type_to_string(PrimitiveType t) { case TYPE_VARCHAR: return "VARCHAR"; + case TYPE_STRING: + return "STRING"; + case TYPE_BINARY: return "BINARY"; @@ -280,6 +289,9 @@ std::string type_to_odbc_string(PrimitiveType t) { case TYPE_VARCHAR: return "string"; + case TYPE_STRING: + return "string"; + case TYPE_BINARY: return "binary"; @@ -336,6 +348,7 @@ int get_slot_size(PrimitiveType type) { case TYPE_HLL: case TYPE_CHAR: case TYPE_VARCHAR: + case TYPE_STRING: return sizeof(StringValue); case TYPE_ARRAY: return sizeof(CollectionValue); diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index ebd12b5602..cdafb2ced8 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -55,8 +55,9 @@ enum PrimitiveType { TYPE_HLL, /* 19 */ TYPE_DECIMALV2, /* 20 */ - TYPE_TIME, /* 21 */ - TYPE_OBJECT, + TYPE_TIME, /* 21 */ + TYPE_OBJECT, /* 22 */ + TYPE_STRING, /* 23 */ }; inline bool is_enumeration_type(PrimitiveType type) { @@ -66,6 +67,7 @@ inline bool is_enumeration_type(PrimitiveType type) { case TYPE_NULL: case TYPE_CHAR: case TYPE_VARCHAR: + case TYPE_STRING: case TYPE_DATETIME: case TYPE_DECIMALV2: case TYPE_BOOLEAN: @@ -88,13 +90,13 @@ inline bool is_enumeration_type(PrimitiveType type) { return false; } -// inline bool is_date_type(PrimitiveType type) { -// return type == TYPE_DATETIME || type == TYPE_DATE; -// } -// -// inline bool is_string_type(PrimitiveType type) { -// return type == TYPE_CHAR || type == TYPE_VARCHAR; -// } +inline bool is_date_type(PrimitiveType type) { + return type == TYPE_DATETIME || type == TYPE_DATE; +} + +inline bool is_string_type(PrimitiveType type) { + return type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_STRING; +} // Returns the byte size of 'type' Returns 0 for variable length types. inline int get_byte_size(PrimitiveType type) { @@ -102,6 +104,7 @@ inline int get_byte_size(PrimitiveType type) { case TYPE_OBJECT: case TYPE_HLL: case TYPE_VARCHAR: + case TYPE_STRING: case TYPE_ARRAY: return 0; @@ -141,6 +144,7 @@ inline int get_real_byte_size(PrimitiveType type) { case TYPE_OBJECT: case TYPE_HLL: case TYPE_VARCHAR: + case TYPE_STRING: case TYPE_ARRAY: return 0; @@ -181,15 +185,21 @@ int get_slot_size(PrimitiveType type); inline bool is_type_compatible(PrimitiveType lhs, PrimitiveType rhs) { if (lhs == TYPE_VARCHAR) { - return rhs == TYPE_CHAR || rhs == TYPE_VARCHAR || rhs == TYPE_HLL || rhs == TYPE_OBJECT; + return rhs == TYPE_CHAR || rhs == TYPE_VARCHAR || rhs == TYPE_HLL || rhs == TYPE_OBJECT || + rhs == TYPE_STRING; } if (lhs == TYPE_OBJECT) { - return rhs == TYPE_VARCHAR || rhs == TYPE_OBJECT; + return rhs == TYPE_VARCHAR || rhs == TYPE_OBJECT || rhs == TYPE_STRING; } if (lhs == TYPE_CHAR || lhs == TYPE_HLL) { - return rhs == TYPE_CHAR || rhs == TYPE_VARCHAR || rhs == TYPE_HLL; + return rhs == TYPE_CHAR || rhs == TYPE_VARCHAR || rhs == TYPE_HLL || rhs == TYPE_STRING; + } + + if (lhs == TYPE_STRING) { + return rhs == TYPE_CHAR || rhs == TYPE_VARCHAR || rhs == TYPE_HLL || rhs == TYPE_OBJECT || + rhs == TYPE_STRING; } return lhs == rhs; @@ -260,6 +270,11 @@ struct PrimitiveTypeTraits { using CppType = StringValue; }; +template <> +struct PrimitiveTypeTraits { + using CppType = StringValue; +}; + } // namespace doris #endif diff --git a/be/src/runtime/raw_value.cpp b/be/src/runtime/raw_value.cpp index d83e9e5752..e9778bbda5 100644 --- a/be/src/runtime/raw_value.cpp +++ b/be/src/runtime/raw_value.cpp @@ -72,6 +72,7 @@ void RawValue::print_value_as_bytes(const void* value, const TypeDescriptor& typ case TYPE_VARCHAR: case TYPE_HLL: case TYPE_CHAR: + case TYPE_STRING: string_val = reinterpret_cast(value); stream->write(static_cast(string_val->ptr), string_val->len); return; @@ -148,6 +149,7 @@ void RawValue::print_value(const void* value, const TypeDescriptor& type, int sc case TYPE_HLL: case TYPE_CHAR: case TYPE_VARCHAR: + case TYPE_STRING: string_val = reinterpret_cast(value); tmp.assign(static_cast(string_val->ptr), string_val->len); *stream << tmp; @@ -213,10 +215,11 @@ void RawValue::print_value(const void* value, const TypeDescriptor& type, int sc case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_OBJECT: - case TYPE_HLL: { + case TYPE_HLL: + case TYPE_STRING: { string_val = reinterpret_cast(value); std::stringstream ss; - ss << "ptr:" << (void*)string_val->ptr << " len" << string_val->len; + ss << "ptr:" << (void*)string_val->ptr << " len:" << string_val->len; tmp = ss.str(); if (string_val->len <= 1000) { tmp.assign(static_cast(string_val->ptr), string_val->len); @@ -294,7 +297,8 @@ void RawValue::write(const void* value, void* dst, const TypeDescriptor& type, M case TYPE_OBJECT: case TYPE_HLL: case TYPE_VARCHAR: - case TYPE_CHAR: { + case TYPE_CHAR: + case TYPE_STRING: { const StringValue* src = reinterpret_cast(value); StringValue* dest = reinterpret_cast(dst); dest->len = src->len; @@ -373,7 +377,8 @@ void RawValue::write(const void* value, const TypeDescriptor& type, void* dst, u *reinterpret_cast(dst) = *reinterpret_cast(value); break; case TYPE_VARCHAR: - case TYPE_CHAR: { + case TYPE_CHAR: + case TYPE_STRING: { DCHECK(buf != NULL); const StringValue* src = reinterpret_cast(value); StringValue* dest = reinterpret_cast(dst); diff --git a/be/src/runtime/raw_value.h b/be/src/runtime/raw_value.h index 53e2a7857a..1df91222a8 100644 --- a/be/src/runtime/raw_value.h +++ b/be/src/runtime/raw_value.h @@ -144,6 +144,7 @@ inline bool RawValue::lt(const void* v1, const void* v2, const TypeDescriptor& t case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: + case TYPE_STRING: string_value1 = reinterpret_cast(v1); string_value2 = reinterpret_cast(v2); return string_value1->lt(*string_value2); @@ -195,6 +196,7 @@ inline bool RawValue::eq(const void* v1, const void* v2, const TypeDescriptor& t case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: + case TYPE_STRING: string_value1 = reinterpret_cast(v1); string_value2 = reinterpret_cast(v2); return string_value1->eq(*string_value2); @@ -232,7 +234,8 @@ inline uint32_t RawValue::get_hash_value(const void* v, const PrimitiveType& typ switch (type) { case TYPE_VARCHAR: case TYPE_CHAR: - case TYPE_HLL: { + case TYPE_HLL: + case TYPE_STRING: { const StringValue* string_value = reinterpret_cast(v); return HashUtil::hash(string_value->ptr, string_value->len, seed); } @@ -287,7 +290,8 @@ inline uint32_t RawValue::get_hash_value_fvn(const void* v, const PrimitiveType& switch (type) { case TYPE_VARCHAR: case TYPE_CHAR: - case TYPE_HLL: { + case TYPE_HLL: + case TYPE_STRING: { const StringValue* string_value = reinterpret_cast(v); return HashUtil::fnv_hash(string_value->ptr, string_value->len, seed); } @@ -342,7 +346,8 @@ inline uint32_t RawValue::zlib_crc32(const void* v, const TypeDescriptor& type, switch (type.type) { case TYPE_VARCHAR: - case TYPE_HLL: { + case TYPE_HLL: + case TYPE_STRING: { const StringValue* string_value = reinterpret_cast(v); return HashUtil::zlib_crc_hash(string_value->ptr, string_value->len, seed); } diff --git a/be/src/runtime/raw_value_ir.cpp b/be/src/runtime/raw_value_ir.cpp index 56ab7f5f4d..944c3fc605 100644 --- a/be/src/runtime/raw_value_ir.cpp +++ b/be/src/runtime/raw_value_ir.cpp @@ -80,6 +80,7 @@ int RawValue::compare(const void* v1, const void* v2, const TypeDescriptor& type case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_HLL: + case TYPE_STRING: string_value1 = reinterpret_cast(v1); string_value2 = reinterpret_cast(v2); return string_value1->compare(*string_value2); diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h index a2b18c2b1f..e40bc5b0a1 100644 --- a/be/src/runtime/types.h +++ b/be/src/runtime/types.h @@ -38,7 +38,8 @@ struct TypeDescriptor { PrimitiveType type; /// Only set if type == TYPE_CHAR or type == TYPE_VARCHAR int len; - static const int MAX_VARCHAR_LENGTH = 65355; + static const int MAX_VARCHAR_LENGTH = OLAP_VARCHAR_MAX_LENGTH; + static const int MAX_STRING_LENGTH = OLAP_STRING_MAX_LENGTH; static const int MAX_CHAR_LENGTH = 255; static const int MAX_CHAR_INLINE_LENGTH = 128; @@ -96,6 +97,13 @@ struct TypeDescriptor { return ret; } + static TypeDescriptor create_string_type() { + TypeDescriptor ret; + ret.type = TYPE_STRING; + ret.len = MAX_STRING_LENGTH; + return ret; + } + static TypeDescriptor create_hll_type() { TypeDescriptor ret; ret.type = TYPE_HLL; @@ -156,7 +164,7 @@ struct TypeDescriptor { void to_protobuf(PTypeDesc* ptype) const; inline bool is_string_type() const { - return type == TYPE_VARCHAR || type == TYPE_CHAR || type == TYPE_HLL || type == TYPE_OBJECT; + return type == TYPE_VARCHAR || type == TYPE_CHAR || type == TYPE_HLL || type == TYPE_OBJECT || type == TYPE_STRING; } inline bool is_date_type() const { return type == TYPE_DATE || type == TYPE_DATETIME; } @@ -166,7 +174,7 @@ struct TypeDescriptor { inline bool is_datetime_type() const { return type == TYPE_DATETIME; } inline bool is_var_len_string_type() const { - return type == TYPE_VARCHAR || type == TYPE_HLL || type == TYPE_CHAR || type == TYPE_OBJECT; + return type == TYPE_VARCHAR || type == TYPE_HLL || type == TYPE_CHAR || type == TYPE_OBJECT || type == TYPE_STRING; } inline bool is_complex_type() const { @@ -183,6 +191,7 @@ struct TypeDescriptor { case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: return 0; case TYPE_NULL: @@ -221,6 +230,7 @@ struct TypeDescriptor { case TYPE_VARCHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: return sizeof(StringValue); case TYPE_NULL: diff --git a/be/src/runtime/vectorized_row_batch.cpp b/be/src/runtime/vectorized_row_batch.cpp index 8c44dd9c9f..1fcdcd9358 100644 --- a/be/src/runtime/vectorized_row_batch.cpp +++ b/be/src/runtime/vectorized_row_batch.cpp @@ -53,7 +53,7 @@ void VectorizedRowBatch::dump_to_row_block(RowBlock* row_block) { row_block->_mem_buf + row_block->_field_offset_in_memory[column_id]; const TabletColumn& column = _schema->column(column_id); size_t field_size = 0; - if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR || + if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == OLAP_FIELD_TYPE_STRING || column.type() == OLAP_FIELD_TYPE_HLL || column.type() == OLAP_FIELD_TYPE_OBJECT) { field_size = sizeof(Slice); } else { @@ -96,7 +96,7 @@ void VectorizedRowBatch::dump_to_row_block(RowBlock* row_block) { const TabletColumn& column = _schema->column(column_id); size_t field_size = 0; - if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR || + if (column.type() == OLAP_FIELD_TYPE_CHAR || column.type() == OLAP_FIELD_TYPE_VARCHAR || column.type() == OLAP_FIELD_TYPE_STRING|| column.type() == OLAP_FIELD_TYPE_HLL || column.type() == OLAP_FIELD_TYPE_OBJECT) { field_size = sizeof(Slice); } else { diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt index 91a9f3c508..6cad3e0c42 100644 --- a/be/src/service/CMakeLists.txt +++ b/be/src/service/CMakeLists.txt @@ -29,18 +29,19 @@ add_library(Service internal_service.cpp ) -add_executable(palo_be - doris_main.cpp -) +if (${MAKE_TEST} STREQUAL "OFF") + add_executable(palo_be + doris_main.cpp + ) -# This permits libraries loaded by dlopen to link to the symbols in the program. -set_target_properties(palo_be PROPERTIES ENABLE_EXPORTS 1) + # This permits libraries loaded by dlopen to link to the symbols in the program. + set_target_properties(palo_be PROPERTIES ENABLE_EXPORTS 1) -target_link_libraries(palo_be - ${DORIS_LINK_LIBS} -) + target_link_libraries(palo_be + ${DORIS_LINK_LIBS} + ) -install(DIRECTORY DESTINATION ${OUTPUT_DIR}/lib/) + install(DIRECTORY DESTINATION ${OUTPUT_DIR}/lib/) -install(TARGETS palo_be - DESTINATION ${OUTPUT_DIR}/lib/) + install(TARGETS palo_be DESTINATION ${OUTPUT_DIR}/lib/) +endif() diff --git a/be/src/util/arrow/row_batch.cpp b/be/src/util/arrow/row_batch.cpp index 7728f3d044..dfd5df4a14 100644 --- a/be/src/util/arrow/row_batch.cpp +++ b/be/src/util/arrow/row_batch.cpp @@ -76,6 +76,7 @@ Status convert_to_arrow_type(const TypeDescriptor& type, std::shared_ptrlen == 0) { // 0x01 is a magic num, not useful actually, just for present "" diff --git a/be/src/util/symbols_util.cpp b/be/src/util/symbols_util.cpp index 7461c689fc..a8d6d3c687 100644 --- a/be/src/util/symbols_util.cpp +++ b/be/src/util/symbols_util.cpp @@ -154,6 +154,7 @@ static void append_any_val_type(int namespace_id, const TypeDescriptor& type, case TYPE_CHAR: case TYPE_HLL: case TYPE_OBJECT: + case TYPE_STRING: append_mangled_token("StringVal", s); break; case TYPE_DATE: diff --git a/be/test/olap/column_reader_test.cpp b/be/test/olap/column_reader_test.cpp index dacefbae3a..ee4c84ff7e 100644 --- a/be/test/olap/column_reader_test.cpp +++ b/be/test/olap/column_reader_test.cpp @@ -78,7 +78,7 @@ public: _length_buffers.clear(); } - void create_columnWriter(const TabletSchema& tablet_schema) { + void create_column_writer(const TabletSchema& tablet_schema) { _column_writer = ColumnWriter::create(0, tablet_schema, _stream_factory, 1024, BLOOM_FILTER_DEFAULT_FPP); @@ -86,15 +86,15 @@ public: ASSERT_EQ(_column_writer->init(), OLAP_SUCCESS); } - void create_columnReader(const TabletSchema& tablet_schema) { + void create_column_reader(const TabletSchema& tablet_schema) { UniqueIdEncodingMap encodings; encodings[0] = ColumnEncodingMessage(); encodings[0].set_kind(ColumnEncodingMessage::DIRECT); encodings[0].set_dictionary_size(1); - create_columnReader(tablet_schema, encodings); + create_column_reader(tablet_schema, encodings); } - void create_columnReader(const TabletSchema& tablet_schema, UniqueIdEncodingMap& encodings) { + void create_column_reader(const TabletSchema& tablet_schema, UniqueIdEncodingMap& encodings) { UniqueIdToColumnIdMap included; included[0] = 0; UniqueIdToColumnIdMap segment_included; @@ -171,13 +171,15 @@ public: OLAP_SUCCESS); } - void SetTabletSchemaWithOneColumn(std::string name, std::string type, std::string aggregation, - uint32_t length, bool is_allow_null, bool is_key, - TabletSchema* tablet_schema) { + void set_tablet_schema_with_one_column(std::string name, std::string type, + std::string aggregation, uint32_t length, + bool is_allow_null, bool is_key, + TabletSchema* tablet_schema) { TabletSchemaPB tablet_schema_pb; ColumnPB* column = tablet_schema_pb.add_column(); column->set_unique_id(0); column->set_name(name); + column->set_name(name); column->set_type(type); column->set_is_key(is_key); column->set_is_nullable(is_allow_null); @@ -217,9 +219,9 @@ public: TEST_F(TestColumn, VectorizedTinyColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("TinyColumn", "TINYINT", "REPLACE", 1, false, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("TinyColumn", "TINYINT", "REPLACE", 1, false, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -244,7 +246,7 @@ TEST_F(TestColumn, VectorizedTinyColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -264,10 +266,10 @@ TEST_F(TestColumn, VectorizedTinyColumnWithoutPresent) { TEST_F(TestColumn, SeekTinyColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("TinyColumn", "TINYINT", "REPLACE", 1, false, true, - &tablet_schema); + set_tablet_schema_with_one_column("TinyColumn", "TINYINT", "REPLACE", 1, false, true, + &tablet_schema); - create_columnWriter(tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -303,7 +305,7 @@ TEST_F(TestColumn, SeekTinyColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -342,10 +344,10 @@ TEST_F(TestColumn, SeekTinyColumnWithoutPresent) { TEST_F(TestColumn, SkipTinyColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("TinyColumn", "TINYINT", "REPLACE", 1, false, true, - &tablet_schema); + set_tablet_schema_with_one_column("TinyColumn", "TINYINT", "REPLACE", 1, false, true, + &tablet_schema); - create_columnWriter(tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -374,7 +376,7 @@ TEST_F(TestColumn, SkipTinyColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -390,8 +392,9 @@ TEST_F(TestColumn, SkipTinyColumnWithoutPresent) { TEST_F(TestColumn, VectorizedTinyColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("TinyColumn", "TINYINT", "REPLACE", 1, true, true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("TinyColumn", "TINYINT", "REPLACE", 1, true, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -416,7 +419,7 @@ TEST_F(TestColumn, VectorizedTinyColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -434,8 +437,9 @@ TEST_F(TestColumn, VectorizedTinyColumnWithPresent) { TEST_F(TestColumn, TinyColumnIndex) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("TinyColumn", "TINYINT", "REPLACE", 1, true, true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("TinyColumn", "TINYINT", "REPLACE", 1, true, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -460,7 +464,7 @@ TEST_F(TestColumn, TinyColumnIndex) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -478,8 +482,9 @@ TEST_F(TestColumn, TinyColumnIndex) { TEST_F(TestColumn, SeekTinyColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("TinyColumn", "TINYINT", "REPLACE", 1, true, true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("TinyColumn", "TINYINT", "REPLACE", 1, true, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -515,7 +520,7 @@ TEST_F(TestColumn, SeekTinyColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -553,8 +558,9 @@ TEST_F(TestColumn, SeekTinyColumnWithPresent) { TEST_F(TestColumn, SkipTinyColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("TinyColumn", "TINYINT", "REPLACE", 1, true, true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("TinyColumn", "TINYINT", "REPLACE", 1, true, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -583,7 +589,7 @@ TEST_F(TestColumn, SkipTinyColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -599,9 +605,9 @@ TEST_F(TestColumn, SkipTinyColumnWithPresent) { TEST_F(TestColumn, VectorizedShortColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("ShortColumn", "SMALLINT", "REPLACE", 2, false, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("ShortColumn", "SMALLINT", "REPLACE", 2, false, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -626,7 +632,7 @@ TEST_F(TestColumn, VectorizedShortColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -644,9 +650,9 @@ TEST_F(TestColumn, VectorizedShortColumnWithoutPresent) { TEST_F(TestColumn, SeekShortColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("ShortColumn", "SMALLINT", "REPLACE", 2, false, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("ShortColumn", "SMALLINT", "REPLACE", 2, false, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -680,7 +686,7 @@ TEST_F(TestColumn, SeekShortColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -719,9 +725,9 @@ TEST_F(TestColumn, SeekShortColumnWithoutPresent) { TEST_F(TestColumn, SkipShortColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("ShortColumn", "SMALLINT", "REPLACE", 2, false, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("ShortColumn", "SMALLINT", "REPLACE", 2, false, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -750,7 +756,7 @@ TEST_F(TestColumn, SkipShortColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -766,9 +772,9 @@ TEST_F(TestColumn, SkipShortColumnWithoutPresent) { TEST_F(TestColumn, SeekShortColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("ShortColumn", "SMALLINT", "REPLACE", 2, true, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("ShortColumn", "SMALLINT", "REPLACE", 2, true, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -802,7 +808,7 @@ TEST_F(TestColumn, SeekShortColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -839,9 +845,9 @@ TEST_F(TestColumn, VectorizedShortColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("ShortColumn", "SMALLINT", "REPLACE", 2, true, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("ShortColumn", "SMALLINT", "REPLACE", 2, true, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -866,7 +872,7 @@ TEST_F(TestColumn, VectorizedShortColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -886,9 +892,9 @@ TEST_F(TestColumn, VectorizedShortColumnWithPresent) { TEST_F(TestColumn, SkipShortColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("ShortColumn", "SMALLINT", "REPLACE", 2, true, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("ShortColumn", "SMALLINT", "REPLACE", 2, true, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -916,7 +922,7 @@ TEST_F(TestColumn, SkipShortColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -932,8 +938,9 @@ TEST_F(TestColumn, SkipShortColumnWithPresent) { TEST_F(TestColumn, VectorizedIntColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("IntColumn", "INT", "REPLACE", 4, false, true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("IntColumn", "INT", "REPLACE", 4, false, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -958,7 +965,7 @@ TEST_F(TestColumn, VectorizedIntColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -976,8 +983,9 @@ TEST_F(TestColumn, VectorizedIntColumnWithoutPresent) { TEST_F(TestColumn, VectorizedIntColumnMassWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("IntColumn", "INT", "REPLACE", 4, false, true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("IntColumn", "INT", "REPLACE", 4, false, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -998,7 +1006,7 @@ TEST_F(TestColumn, VectorizedIntColumnMassWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1023,8 +1031,8 @@ TEST_F(TestColumn, VectorizedIntColumnMassWithoutPresent) { TEST_F(TestColumn, VectorizedIntColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("IntColumn", "INT", "REPLACE", 4, true, true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("IntColumn", "INT", "REPLACE", 4, true, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1049,7 +1057,7 @@ TEST_F(TestColumn, VectorizedIntColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1070,9 +1078,9 @@ TEST_F(TestColumn, VectorizedIntColumnWithPresent) { TEST_F(TestColumn, VectorizedLongColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("LongColumnWithoutPresent", "BIGINT", "REPLACE", 8, false, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("LongColumnWithoutPresent", "BIGINT", "REPLACE", 8, false, + true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1098,7 +1106,7 @@ TEST_F(TestColumn, VectorizedLongColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1116,9 +1124,9 @@ TEST_F(TestColumn, VectorizedLongColumnWithoutPresent) { TEST_F(TestColumn, VectorizedLongColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("LongColumnWithPresent", "BIGINT", "REPLACE", 8, true, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("LongColumnWithPresent", "BIGINT", "REPLACE", 8, true, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1144,7 +1152,7 @@ TEST_F(TestColumn, VectorizedLongColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1164,9 +1172,9 @@ TEST_F(TestColumn, VectorizedLongColumnWithPresent) { TEST_F(TestColumn, VectorizedFloatColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("FloatColumnWithoutPresent", "FLOAT", "REPLACE", 4, false, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("FloatColumnWithoutPresent", "FLOAT", "REPLACE", 4, false, + true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1192,7 +1200,7 @@ TEST_F(TestColumn, VectorizedFloatColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1212,9 +1220,9 @@ TEST_F(TestColumn, VectorizedFloatColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("FloatColumnWithPresent", "FLOAT", "REPLACE", 4, true, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("FloatColumnWithPresent", "FLOAT", "REPLACE", 4, true, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1240,7 +1248,7 @@ TEST_F(TestColumn, VectorizedFloatColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1261,9 +1269,9 @@ TEST_F(TestColumn, SeekFloatColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("FloatColumnWithPresent", "FLOAT", "REPLACE", 4, true, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("FloatColumnWithPresent", "FLOAT", "REPLACE", 4, true, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1293,7 +1301,7 @@ TEST_F(TestColumn, SeekFloatColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1326,9 +1334,9 @@ TEST_F(TestColumn, SkipFloatColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("FloatColumnWithPresent", "FLOAT", "REPLACE", 4, true, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("FloatColumnWithPresent", "FLOAT", "REPLACE", 4, true, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1354,7 +1362,7 @@ TEST_F(TestColumn, SkipFloatColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1371,9 +1379,9 @@ TEST_F(TestColumn, VectorizedDoubleColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DoubleColumnWithoutPresent", "DOUBLE", "REPLACE", 8, false, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DoubleColumnWithoutPresent", "DOUBLE", "REPLACE", 8, false, + true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1399,7 +1407,7 @@ TEST_F(TestColumn, VectorizedDoubleColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1419,9 +1427,9 @@ TEST_F(TestColumn, VectorizedDoubleColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DoubleColumnWithPresent", "DOUBLE", "REPLACE", 8, true, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DoubleColumnWithPresent", "DOUBLE", "REPLACE", 8, true, true, + &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1447,7 +1455,7 @@ TEST_F(TestColumn, VectorizedDoubleColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1469,9 +1477,9 @@ TEST_F(TestColumn, VectorizedDatetimeColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DatetimeColumnWithoutPresent", "DATETIME", "REPLACE", 8, false, - true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DatetimeColumnWithoutPresent", "DATETIME", "REPLACE", 8, + false, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1493,7 +1501,7 @@ TEST_F(TestColumn, VectorizedDatetimeColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1510,9 +1518,9 @@ TEST_F(TestColumn, VectorizedDatetimeColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DatetimeColumnWithoutPresent", "DATETIME", "REPLACE", 8, true, - true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DatetimeColumnWithoutPresent", "DATETIME", "REPLACE", 8, + true, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1540,7 +1548,7 @@ TEST_F(TestColumn, VectorizedDatetimeColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1564,9 +1572,9 @@ TEST_F(TestColumn, VectorizedDatetimeColumnZero) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DatetimeColumnWithoutPresent", "DATETIME", "REPLACE", 8, true, - true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DatetimeColumnWithoutPresent", "DATETIME", "REPLACE", 8, + true, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1594,7 +1602,7 @@ TEST_F(TestColumn, VectorizedDatetimeColumnZero) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1620,9 +1628,9 @@ TEST_F(TestColumn, VectorizedDateColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DateColumnWithoutoutPresent", "DATE", "REPLACE", 3, false, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DateColumnWithoutoutPresent", "DATE", "REPLACE", 3, false, + true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1644,7 +1652,7 @@ TEST_F(TestColumn, VectorizedDateColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1660,9 +1668,9 @@ TEST_F(TestColumn, VectorizedDateColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DateColumnWithoutoutPresent", "DATE", "REPLACE", 3, true, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DateColumnWithoutoutPresent", "DATE", "REPLACE", 3, true, + true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1692,7 +1700,7 @@ TEST_F(TestColumn, VectorizedDateColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1716,9 +1724,9 @@ TEST_F(TestColumn, VectorizedDecimalColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DecimalColumnWithoutoutPresent", "DECIMAL", "REPLACE", 12, false, - true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DecimalColumnWithoutoutPresent", "DECIMAL", "REPLACE", 12, + false, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1748,7 +1756,7 @@ TEST_F(TestColumn, VectorizedDecimalColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1768,9 +1776,9 @@ TEST_F(TestColumn, VectorizedDecimalColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DecimalColumnWithoutoutPresent", "DECIMAL", "REPLACE", 12, true, - true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DecimalColumnWithoutoutPresent", "DECIMAL", "REPLACE", 12, + true, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1799,7 +1807,7 @@ TEST_F(TestColumn, VectorizedDecimalColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1820,9 +1828,9 @@ TEST_F(TestColumn, SkipDecimalColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DecimalColumnWithPresent", "DECIMAL", "REPLACE", 12, true, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DecimalColumnWithPresent", "DECIMAL", "REPLACE", 12, true, + true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1850,7 +1858,7 @@ TEST_F(TestColumn, SkipDecimalColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1868,9 +1876,9 @@ TEST_F(TestColumn, SkipDecimalColumnWithPresent) { TEST_F(TestColumn, SeekDecimalColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DecimalColumnWithPresent", "DECIMAL", "REPLACE", 12, true, true, - &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DecimalColumnWithPresent", "DECIMAL", "REPLACE", 12, true, + true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1904,7 +1912,7 @@ TEST_F(TestColumn, SeekDecimalColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1943,14 +1951,14 @@ TEST_F(TestColumn, VectorizedLargeIntColumnWithoutPresent) { // init tablet schema TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("LargeIntColumnWithoutoutPresent", "LARGEINT", "SUM", 16, false, - true, &tablet_schema); + set_tablet_schema_with_one_column("LargeIntColumnWithoutoutPresent", "LARGEINT", "SUM", 16, + false, true, &tablet_schema); // test data string value1 = "100000000000000000000000000000000000000"; string value2 = "-170141183460469231731687303715884105728"; // write data - create_columnWriter(tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -1979,7 +1987,7 @@ TEST_F(TestColumn, VectorizedLargeIntColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -1999,15 +2007,15 @@ TEST_F(TestColumn, VectorizedLargeIntColumnWithPresent) { // init tablet schema TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("LargeIntColumnWithoutoutPresent", "LARGEINT", "SUM", 16, true, - true, &tablet_schema); + set_tablet_schema_with_one_column("LargeIntColumnWithoutoutPresent", "LARGEINT", "SUM", 16, + true, true, &tablet_schema); // test data string value1 = "100000000000000000000000000000000000000"; string value2 = "-170141183460469231731687303715884105728"; // write data - create_columnWriter(tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -2043,7 +2051,7 @@ TEST_F(TestColumn, VectorizedLargeIntColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -2071,14 +2079,14 @@ TEST_F(TestColumn, SkipLargeIntColumnWithPresent) { // init tablet schema TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("LargeIntColumnWithPresent", "LARGEINT", "SUM", 16, true, true, - &tablet_schema); + set_tablet_schema_with_one_column("LargeIntColumnWithPresent", "LARGEINT", "SUM", 16, true, + true, &tablet_schema); // test data string value1 = "100000000000000000000000000000000000000"; string value2 = "-170141183460469231731687303715884105728"; // write data - create_columnWriter(tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -2107,7 +2115,7 @@ TEST_F(TestColumn, SkipLargeIntColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -2124,10 +2132,10 @@ TEST_F(TestColumn, VectorizedDirectVarcharColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DirectVarcharColumnWithoutoutPresent", "VARCHAR", "REPLACE", 10, - false, true, &tablet_schema); + set_tablet_schema_with_one_column("DirectVarcharColumnWithoutoutPresent", "VARCHAR", "REPLACE", + 10, false, true, &tablet_schema); - create_columnWriter(tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -2164,7 +2172,7 @@ TEST_F(TestColumn, VectorizedDirectVarcharColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -2189,9 +2197,9 @@ TEST_F(TestColumn, VectorizedDirectVarcharColumnWithoutPresent) { TEST_F(TestColumn, VectorizedDirectVarcharColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DirectVarcharColumnWithoutoutPresent", "VARCHAR", "REPLACE", 10, - true, true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DirectVarcharColumnWithoutoutPresent", "VARCHAR", "REPLACE", + 10, true, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -2220,7 +2228,7 @@ TEST_F(TestColumn, VectorizedDirectVarcharColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -2241,9 +2249,9 @@ TEST_F(TestColumn, SkipDirectVarcharColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DirectVarcharColumnWithPresent", "VARCHAR", "REPLACE", 10, true, - true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DirectVarcharColumnWithPresent", "VARCHAR", "REPLACE", 10, + true, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -2274,7 +2282,7 @@ TEST_F(TestColumn, SkipDirectVarcharColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -2293,9 +2301,9 @@ TEST_F(TestColumn, SeekDirectVarcharColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DirectVarcharColumnWithPresent", "VARCHAR", "REPLACE", 10, false, - true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DirectVarcharColumnWithPresent", "VARCHAR", "REPLACE", 10, + false, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -2330,7 +2338,7 @@ TEST_F(TestColumn, SeekDirectVarcharColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -2366,9 +2374,9 @@ TEST_F(TestColumn, SeekDirectVarcharColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DirectVarcharColumnWithPresent", "VARCHAR", "REPLACE", 10, true, - true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DirectVarcharColumnWithPresent", "VARCHAR", "REPLACE", 10, + true, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -2403,7 +2411,7 @@ TEST_F(TestColumn, SeekDirectVarcharColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -2435,12 +2443,12 @@ TEST_F(TestColumn, SeekDirectVarcharColumnWithPresent) { ASSERT_TRUE(strncmp(value->data, "YWFhYWE=", value->size) == 0); } -TEST_F(TestColumn, VectorizedStringColumnWithoutPresent) { +TEST_F(TestColumn, VectorizedCharColumnWithoutPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("VarcharColumnWithoutoutPresent", "CHAR", "REPLACE", - strlen("abcde"), false, true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("CharColumnWithoutoutPresent", "CHAR", "REPLACE", + strlen("abcde"), false, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -2477,7 +2485,7 @@ TEST_F(TestColumn, VectorizedStringColumnWithoutPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -2499,12 +2507,12 @@ TEST_F(TestColumn, VectorizedStringColumnWithoutPresent) { ASSERT_NE(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS); } -TEST_F(TestColumn, VectorizedStringColumnWithPresent) { +TEST_F(TestColumn, VectorizedCharColumnWithPresent) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("VarcharColumnWithoutoutPresent", "CHAR", "REPLACE", - strlen("abcde"), true, true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("CharColumnWithoutoutPresent", "CHAR", "REPLACE", + strlen("abcde"), true, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -2531,7 +2539,7 @@ TEST_F(TestColumn, VectorizedStringColumnWithPresent) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -2548,12 +2556,12 @@ TEST_F(TestColumn, VectorizedStringColumnWithPresent) { ASSERT_TRUE(strncmp(value->data, "abcde", value->size) == 0); } -TEST_F(TestColumn, VectorizedStringColumnWithoutoutPresent2) { +TEST_F(TestColumn, VectorizedCharColumnWithoutoutPresent2) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("VarcharColumnWithoutoutPresent", "CHAR", "REPLACE", 20, false, - true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("CharColumnWithoutoutPresent", "CHAR", "REPLACE", 20, false, + true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -2608,7 +2616,205 @@ TEST_F(TestColumn, VectorizedStringColumnWithoutoutPresent2) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); + + RowCursor read_row; + read_row.init(tablet_schema); + read_row.allocate_memory_for_string_type(tablet_schema); + + _col_vector.reset(new ColumnVector()); + ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 5, _mem_pool.get()), OLAP_SUCCESS); + Slice* value = reinterpret_cast(_col_vector->col_data()); + + ASSERT_TRUE(strncmp(value->data, "abcde", value->size) == 0); + + value++; + ASSERT_TRUE(strncmp(value->data, "aaaaa", value->size) == 0); + + value++; + ASSERT_TRUE(strncmp(value->data, "bbbbb", value->size) == 0); + + value++; + ASSERT_TRUE(strncmp(value->data, "ccccc", value->size) == 0); + + value++; + ASSERT_TRUE(strncmp(value->data, "ddddd", value->size) == 0); +} + +TEST_F(TestColumn, VectorizedStringColumnWithoutPresent) { + // write data + TabletSchema tablet_schema; + set_tablet_schema_with_one_column("StringColumnWithoutoutPresent", "STRING", "REPLACE", 0, + false, true, &tablet_schema); + create_column_writer(tablet_schema); + + RowCursor write_row; + write_row.init(tablet_schema); + write_row.allocate_memory_for_string_type(tablet_schema); + + RowBlock block(&tablet_schema); + RowBlockInfo block_info; + block_info.row_num = 10000; + block.init(block_info); + + std::vector val_string_array; + val_string_array.push_back("abcde"); //"abcde" base_64_encode is "YWJjZGU=" + OlapTuple tuple1(val_string_array); + write_row.from_tuple(tuple1); + block.set_row(0, write_row); + block.finalize(1); + ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); + for (uint32_t i = 0; i < 2; i++) { + block.set_row(0, write_row); + block.finalize(1); + ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); + } + val_string_array.clear(); + val_string_array.push_back("edcba"); //"edcba" base_64_encode is "ZWRjYmE=" + OlapTuple tuple2(val_string_array); + write_row.from_tuple(tuple2); + for (uint32_t i = 0; i < 2; i++) { + block.set_row(0, write_row); + block.finalize(1); + ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); + } + + ColumnDataHeaderMessage header; + ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); + + // read data + create_column_reader(tablet_schema); + + RowCursor read_row; + read_row.init(tablet_schema); + read_row.allocate_memory_for_string_type(tablet_schema); + + _col_vector.reset(new ColumnVector()); + ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 5, _mem_pool.get()), OLAP_SUCCESS); + Slice* value = reinterpret_cast(_col_vector->col_data()); + + ASSERT_TRUE(strncmp(value->data, "abcde", value->size) == 0); + for (uint32_t i = 0; i < 2; i++) { + value++; + ASSERT_TRUE(strncmp(value->data, "abcde", value->size) == 0); + } + for (uint32_t i = 0; i < 2; i++) { + value++; + ASSERT_TRUE(strncmp(value->data, "edcba", value->size) == 0); + } + ASSERT_NE(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS); +} + +TEST_F(TestColumn, VectorizedStringColumnWithPresent) { + // write data + TabletSchema tablet_schema; + set_tablet_schema_with_one_column("StringColumnWithoutoutPresent", "STRING", "REPLACE", 0, true, + true, &tablet_schema); + create_column_writer(tablet_schema); + + RowCursor write_row; + write_row.init(tablet_schema); + write_row.allocate_memory_for_string_type(tablet_schema); + + RowBlock block(&tablet_schema); + RowBlockInfo block_info; + block_info.row_num = 10000; + block.init(block_info); + + write_row.set_null(0); + block.set_row(0, write_row); + + std::vector val_string_array; + val_string_array.push_back("abcde"); //"abcde" base_64_encode is "YWJjZGU=" + OlapTuple tuple(val_string_array); + write_row.from_tuple(tuple); + write_row.set_not_null(0); + block.set_row(1, write_row); + block.finalize(2); + ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); + + ColumnDataHeaderMessage header; + ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); + + // read data + create_column_reader(tablet_schema); + + RowCursor read_row; + read_row.init(tablet_schema); + read_row.allocate_memory_for_string_type(tablet_schema); + + _col_vector.reset(new ColumnVector()); + ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 2, _mem_pool.get()), OLAP_SUCCESS); + bool* is_null = _col_vector->is_null(); + ASSERT_EQ(is_null[0], true); + ASSERT_EQ(is_null[1], false); + + Slice* value = reinterpret_cast(_col_vector->col_data()); + value++; + ASSERT_TRUE(strncmp(value->data, "abcde", value->size) == 0); +} + +TEST_F(TestColumn, VectorizedStringColumnWithoutoutPresent2) { + // write data + TabletSchema tablet_schema; + set_tablet_schema_with_one_column("StringColumnWithoutoutPresent", "STRING", "REPLACE", 0, + false, true, &tablet_schema); + create_column_writer(tablet_schema); + + RowCursor write_row; + write_row.init(tablet_schema); + write_row.allocate_memory_for_string_type(tablet_schema); + + RowBlock block(&tablet_schema); + RowBlockInfo block_info; + block_info.row_num = 10000; + block.init(block_info); + + std::vector val_string_array; + val_string_array.push_back("abcde"); //"abcde" base_64_encode is "YWJjZGU=" + OlapTuple tuple1(val_string_array); + write_row.from_tuple(tuple1); + block.set_row(0, write_row); + block.finalize(1); + ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); + + val_string_array.clear(); + val_string_array.push_back("aaaaa"); //"abcde" base_64_encode is "YWJjZGU=" + OlapTuple tuple2(val_string_array); + write_row.from_tuple(tuple2); + block.set_row(0, write_row); + block.finalize(1); + ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); + + val_string_array.clear(); + val_string_array.push_back("bbbbb"); //"abcde" base_64_encode is "YWJjZGU=" + OlapTuple tuple3(val_string_array); + write_row.from_tuple(tuple3); + block.set_row(0, write_row); + block.finalize(1); + ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); + + val_string_array.clear(); + val_string_array.push_back("ccccc"); //"abcde" base_64_encode is "YWJjZGU=" + OlapTuple tuple4(val_string_array); + write_row.from_tuple(tuple4); + block.set_row(0, write_row); + block.finalize(1); + ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); + + val_string_array.clear(); + val_string_array.push_back("ddddd"); //"abcde" base_64_encode is "YWJjZGU=" + OlapTuple tuple5(val_string_array); + write_row.from_tuple(tuple5); + block.set_row(0, write_row); + block.finalize(1); + ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); + + ColumnDataHeaderMessage header; + ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); + + // read data + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); @@ -2636,9 +2842,9 @@ TEST_F(TestColumn, VectorizedStringColumnWithoutoutPresent2) { TEST_F(TestColumn, VectorizedDirectVarcharColumnWith65533) { // write data TabletSchema tablet_schema; - SetTabletSchemaWithOneColumn("DirectVarcharColumnWithoutoutPresent", "VARCHAR", "REPLACE", - 65535, false, true, &tablet_schema); - create_columnWriter(tablet_schema); + set_tablet_schema_with_one_column("DirectVarcharColumnWithoutoutPresent", "VARCHAR", "REPLACE", + 65535, false, true, &tablet_schema); + create_column_writer(tablet_schema); RowCursor write_row; write_row.init(tablet_schema); @@ -2671,7 +2877,7 @@ TEST_F(TestColumn, VectorizedDirectVarcharColumnWith65533) { ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); // read data - create_columnReader(tablet_schema); + create_column_reader(tablet_schema); RowCursor read_row; read_row.init(tablet_schema); diff --git a/be/test/olap/row_cursor_test.cpp b/be/test/olap/row_cursor_test.cpp index 34410b9613..3a4ade433c 100644 --- a/be/test/olap/row_cursor_test.cpp +++ b/be/test/olap/row_cursor_test.cpp @@ -112,7 +112,7 @@ void set_tablet_schema_for_init(TabletSchema* tablet_schema) { column_9->set_name("column_9"); column_9->set_type("VARCHAR"); column_9->set_is_nullable(true); - column_9->set_length(16 + OLAP_STRING_MAX_BYTES); + column_9->set_length(16 + OLAP_VARCHAR_MAX_BYTES); column_9->set_aggregation("REPLACE"); column_9->set_is_key(false); @@ -165,7 +165,7 @@ void set_tablet_schema_for_scan_key(TabletSchema* tablet_schema) { column_2->set_type("VARCHAR"); column_2->set_is_key(true); column_2->set_is_nullable(true); - column_2->set_length(16 + OLAP_STRING_MAX_BYTES); + column_2->set_length(16 + OLAP_VARCHAR_MAX_BYTES); column_2->set_index_length(20); ColumnPB* column_3 = tablet_schema_pb.add_column(); @@ -243,7 +243,7 @@ void set_tablet_schema_for_cmp_and_aggregate(TabletSchema* tablet_schema) { column_6->set_name("column_6"); column_6->set_type("VARCHAR"); column_6->set_is_nullable(true); - column_6->set_length(16 + OLAP_STRING_MAX_BYTES); + column_6->set_length(16 + OLAP_VARCHAR_MAX_BYTES); column_6->set_aggregation("REPLACE"); column_6->set_is_key(false); diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index cf8012e7b2..3d99e1d970 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -581,6 +581,7 @@ module.exports = [ "HLL", "INT", "SMALLINT", + "STRING", "TINYINT", "VARCHAR", ], diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index c4fd9b7035..0e0ddae690 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -585,6 +585,7 @@ module.exports = [ "INT", "LARGEINT", "SMALLINT", + "STRING", "TINYINT", "VARCHAR", ], diff --git a/docs/en/sql-reference/sql-statements/Data Types/STRING.md b/docs/en/sql-reference/sql-statements/Data Types/STRING.md new file mode 100644 index 0000000000..654a7b9d3e --- /dev/null +++ b/docs/en/sql-reference/sql-statements/Data Types/STRING.md @@ -0,0 +1,35 @@ +--- +{ + "title": "STRING", + "language": "en" +} +--- + + + +# STRING +## Description +STRING (M) +A variable length string, max legnth is 2147483643(2GB - 4). + +Note: Variable length strings are stored in UTF-8 encoding, so usually English characters occupies 1 byte, and Chinese characters occupies 3 bytes. + +## keyword +STRING diff --git a/docs/en/sql-reference/sql-statements/Data Types/VARCHAR.md b/docs/en/sql-reference/sql-statements/Data Types/VARCHAR.md index ae7fa8599d..3d55680ae8 100644 --- a/docs/en/sql-reference/sql-statements/Data Types/VARCHAR.md +++ b/docs/en/sql-reference/sql-statements/Data Types/VARCHAR.md @@ -26,7 +26,7 @@ under the License. # VARCHAR ## Description -MARKETING (M) +VARCHAR(M) A variable length string, M represents the length of a variable length string. The range of M is 1-65533. Note: Variable length strings are stored in UTF-8 encoding, so usually English characters occupies 1 byte, and Chinese characters occupies 3 bytes. diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/STRING.md b/docs/zh-CN/sql-reference/sql-statements/Data Types/STRING.md new file mode 100644 index 0000000000..7b80e79554 --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-statements/Data Types/STRING.md @@ -0,0 +1,36 @@ +--- +{ + "title": "STRING", + "language": "zh-CN" +} +--- + + + +# STRING +## description + STRING + 变长字符串,最大支持2147483643 字节(2GB-4)。用法类似VARCHAR。 + + 注意:变长字符串是以UTF-8编码存储的,因此通常英文字符占1个字节,中文字符占3个字节。 + +## keyword + + STRING diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 19195aee0b..e60c6c165c 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -10,7 +10,7 @@ // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// "AS IS" BASIS, WITHOUT WARRANTIES ORF CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. @@ -4311,7 +4311,7 @@ type ::= | KW_BITMAP {: RESULT = Type.BITMAP; :} | KW_STRING - {: RESULT = ScalarType.createVarcharType(-1); :} + {: RESULT = ScalarType.createStringType(); :} | KW_VARCHAR LPAREN INTEGER_LITERAL:len RPAREN {: ScalarType type = ScalarType.createVarcharType(len.intValue()); type.setAssignedStrLenInColDefinition(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/AlterOpType.java b/fe/fe-core/src/main/java/org/apache/doris/alter/AlterOpType.java index ff0710bb80..7f8ddac105 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/AlterOpType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/AlterOpType.java @@ -33,7 +33,7 @@ public enum AlterOpType { // table property MODIFY_TABLE_PROPERTY, MODIFY_TABLE_PROPERTY_SYNC, // Some operations are performed synchronously, so we distinguish them by suffix _SYNC - // others operation, such as add/drop backend. currently we do not care about them + // others operation, such as add/drop backend. currently, we do not care about them ALTER_OTHER, ENABLE_FEATURE, REPLACE_TABLE, diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java index 6d51339f4f..274bd03077 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java @@ -303,6 +303,11 @@ public class BinaryPredicate extends Predicate implements Writable { if (t1 == PrimitiveType.VARCHAR && t2 == PrimitiveType.VARCHAR) { return Type.VARCHAR; } + if (t1 == PrimitiveType.STRING && t2 == PrimitiveType.STRING + || t1 == PrimitiveType.STRING && t2 == PrimitiveType.VARCHAR + || t1 == PrimitiveType.VARCHAR && t2 == PrimitiveType.STRING) { + return Type.STRING; + } if (t1 == PrimitiveType.BIGINT && t2 == PrimitiveType.BIGINT) { return Type.getAssignmentCompatibleType(getChild(0).getType(), getChild(1).getType(), false); } @@ -322,14 +327,14 @@ public class BinaryPredicate extends Predicate implements Writable { // When int column compares with string, Mysql will convert string to int. // So it is also compatible with Mysql. - if (t1 == PrimitiveType.BIGINT && t2 == PrimitiveType.VARCHAR) { + if (t1 == PrimitiveType.BIGINT && (t2 == PrimitiveType.VARCHAR || t2 ==PrimitiveType.STRING)) { Expr rightChild = getChild(1); Long parsedLong = Type.tryParseToLong(rightChild); if(parsedLong != null) { return Type.BIGINT; } } - if (t1 == PrimitiveType.VARCHAR && t2 == PrimitiveType.BIGINT) { + if ((t1 == PrimitiveType.VARCHAR || t1 ==PrimitiveType.STRING) && t2 == PrimitiveType.BIGINT) { Expr leftChild = getChild(0); Long parsedLong = Type.tryParseToLong(leftChild); if(parsedLong != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java index 97f31840bf..71cf4f6372 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java @@ -279,6 +279,7 @@ public class ColumnDef { case CHAR: case VARCHAR: case HLL: + case STRING: if (defaultValue.length() > scalarType.getLength()) { throw new AnalysisException("Default value is too long: " + defaultValue); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java index 2a5acd6f55..ef26417747 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java @@ -232,9 +232,10 @@ public class OutFileClause { break; case CHAR: case VARCHAR: + case STRING: case DECIMALV2: if (!type.equals("byte_array")) { - throw new AnalysisException("project field type is CHAR/VARCHAR/DECIMAL, should use byte_array, " + + throw new AnalysisException("project field type is CHAR/VARCHAR/STRING/DECIMAL, should use byte_array, " + "but the definition type of column " + i + " is " + type); } break; @@ -272,6 +273,7 @@ public class OutFileClause { break; case CHAR: case VARCHAR: + case STRING: case DECIMALV2: column.add("byte_array"); break; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java index 1f3ae733fc..6bce9b5ff1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java @@ -131,10 +131,10 @@ public class TypeDef implements ParseNode { String name; int maxLen; if (type == PrimitiveType.VARCHAR) { - name = "Varchar"; + name = "VARCHAR"; maxLen = ScalarType.MAX_VARCHAR_LENGTH; } else if (type == PrimitiveType.CHAR) { - name = "Char"; + name = "CHAR"; maxLen = ScalarType.MAX_CHAR_LENGTH; } else { Preconditions.checkState(false); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java index 7aae66092c..67885ce2db 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java @@ -49,6 +49,7 @@ public abstract class ColumnType { schemaChangeMatrix[PrimitiveType.TINYINT.ordinal()][PrimitiveType.FLOAT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.TINYINT.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true; schemaChangeMatrix[PrimitiveType.TINYINT.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.TINYINT.ordinal()][PrimitiveType.STRING.ordinal()] = true; schemaChangeMatrix[PrimitiveType.SMALLINT.ordinal()][PrimitiveType.INT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.SMALLINT.ordinal()][PrimitiveType.BIGINT.ordinal()] = true; @@ -56,6 +57,7 @@ public abstract class ColumnType { schemaChangeMatrix[PrimitiveType.SMALLINT.ordinal()][PrimitiveType.FLOAT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.SMALLINT.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true; schemaChangeMatrix[PrimitiveType.SMALLINT.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.SMALLINT.ordinal()][PrimitiveType.STRING.ordinal()] = true; schemaChangeMatrix[PrimitiveType.INT.ordinal()][PrimitiveType.BIGINT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.INT.ordinal()][PrimitiveType.LARGEINT.ordinal()] = true; @@ -63,23 +65,29 @@ public abstract class ColumnType { schemaChangeMatrix[PrimitiveType.INT.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true; schemaChangeMatrix[PrimitiveType.INT.ordinal()][PrimitiveType.DATE.ordinal()] = true; schemaChangeMatrix[PrimitiveType.INT.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.INT.ordinal()][PrimitiveType.STRING.ordinal()] = true; schemaChangeMatrix[PrimitiveType.BIGINT.ordinal()][PrimitiveType.LARGEINT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.BIGINT.ordinal()][PrimitiveType.FLOAT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.BIGINT.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true; schemaChangeMatrix[PrimitiveType.BIGINT.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.BIGINT.ordinal()][PrimitiveType.STRING.ordinal()] = true; schemaChangeMatrix[PrimitiveType.LARGEINT.ordinal()][PrimitiveType.FLOAT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.LARGEINT.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true; schemaChangeMatrix[PrimitiveType.LARGEINT.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.LARGEINT.ordinal()][PrimitiveType.STRING.ordinal()] = true; schemaChangeMatrix[PrimitiveType.FLOAT.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true; schemaChangeMatrix[PrimitiveType.FLOAT.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.FLOAT.ordinal()][PrimitiveType.STRING.ordinal()] = true; schemaChangeMatrix[PrimitiveType.DOUBLE.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.DOUBLE.ordinal()][PrimitiveType.STRING.ordinal()] = true; schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.CHAR.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.STRING.ordinal()] = true; schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.TINYINT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.SMALLINT.ordinal()] = true; @@ -89,6 +97,7 @@ public abstract class ColumnType { schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.FLOAT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true; schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.DATE.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.STRING.ordinal()] = true; schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.TINYINT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.SMALLINT.ordinal()] = true; @@ -98,6 +107,7 @@ public abstract class ColumnType { schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.FLOAT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true; schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.DATE.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.STRING.ordinal()] = true; schemaChangeMatrix[PrimitiveType.DECIMALV2.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java index 122f061307..0b11668414 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java @@ -491,6 +491,7 @@ public class Function implements Writable { case CHAR: case HLL: case BITMAP: + case STRING: return "string_val"; case DATE: case DATETIME: @@ -528,6 +529,7 @@ public class Function implements Writable { case CHAR: case HLL: case BITMAP: + case STRING: return "StringVal"; case DATE: case DATETIME: diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java index 761347ae11..a223ab665d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java @@ -57,6 +57,7 @@ public enum PrimitiveType { ARRAY("ARRAY", 24, TPrimitiveType.ARRAY), MAP("MAP", 24, TPrimitiveType.MAP), STRUCT("MAP", 24, TPrimitiveType.STRUCT), + STRING("STRING", 16, TPrimitiveType.STRING), // Unsupported scalar types. BINARY("BINARY", -1, TPrimitiveType.BINARY); @@ -64,6 +65,7 @@ public enum PrimitiveType { private static final int DATE_INDEX_LEN = 3; private static final int DATETIME_INDEX_LEN = 8; private static final int VARCHAR_INDEX_LEN = 20; + private static final int STRING_INDEX_LEN = 20; private static final int DECIMAL_INDEX_LEN = 12; private static ImmutableSetMultimap implicitCastMap; @@ -83,6 +85,7 @@ public enum PrimitiveType { builder.put(NULL_TYPE, DECIMALV2); builder.put(NULL_TYPE, CHAR); builder.put(NULL_TYPE, VARCHAR); + builder.put(NULL_TYPE, STRING); builder.put(NULL_TYPE, BITMAP); builder.put(NULL_TYPE, TIME); // Boolean @@ -98,6 +101,7 @@ public enum PrimitiveType { builder.put(BOOLEAN, DATETIME); builder.put(BOOLEAN, DECIMALV2); builder.put(BOOLEAN, VARCHAR); + builder.put(BOOLEAN, STRING); // Tinyint builder.put(TINYINT, BOOLEAN); builder.put(TINYINT, TINYINT); @@ -111,6 +115,7 @@ public enum PrimitiveType { builder.put(TINYINT, DATETIME); builder.put(TINYINT, DECIMALV2); builder.put(TINYINT, VARCHAR); + builder.put(TINYINT, STRING); // Smallint builder.put(SMALLINT, BOOLEAN); builder.put(SMALLINT, TINYINT); @@ -124,6 +129,7 @@ public enum PrimitiveType { builder.put(SMALLINT, DATETIME); builder.put(SMALLINT, DECIMALV2); builder.put(SMALLINT, VARCHAR); + builder.put(SMALLINT, STRING); // Int builder.put(INT, BOOLEAN); builder.put(INT, TINYINT); @@ -137,6 +143,7 @@ public enum PrimitiveType { builder.put(INT, DATETIME); builder.put(INT, DECIMALV2); builder.put(INT, VARCHAR); + builder.put(INT, STRING); // Bigint builder.put(BIGINT, BOOLEAN); builder.put(BIGINT, TINYINT); @@ -150,6 +157,7 @@ public enum PrimitiveType { builder.put(BIGINT, DATETIME); builder.put(BIGINT, DECIMALV2); builder.put(BIGINT, VARCHAR); + builder.put(BIGINT, STRING); // Largeint builder.put(LARGEINT, BOOLEAN); builder.put(LARGEINT, TINYINT); @@ -163,6 +171,7 @@ public enum PrimitiveType { builder.put(LARGEINT, DATETIME); builder.put(LARGEINT, DECIMALV2); builder.put(LARGEINT, VARCHAR); + builder.put(LARGEINT, STRING); // Float builder.put(FLOAT, BOOLEAN); builder.put(FLOAT, TINYINT); @@ -176,6 +185,7 @@ public enum PrimitiveType { builder.put(FLOAT, DATETIME); builder.put(FLOAT, DECIMALV2); builder.put(FLOAT, VARCHAR); + builder.put(FLOAT, STRING); // Double builder.put(DOUBLE, BOOLEAN); builder.put(DOUBLE, TINYINT); @@ -189,6 +199,7 @@ public enum PrimitiveType { builder.put(DOUBLE, DATETIME); builder.put(DOUBLE, DECIMALV2); builder.put(DOUBLE, VARCHAR); + builder.put(DOUBLE, STRING); // Date builder.put(DATE, BOOLEAN); builder.put(DATE, TINYINT); @@ -202,6 +213,7 @@ public enum PrimitiveType { builder.put(DATE, DATETIME); builder.put(DATE, DECIMALV2); builder.put(DATE, VARCHAR); + builder.put(DATE, STRING); // Datetime builder.put(DATETIME, BOOLEAN); builder.put(DATETIME, TINYINT); @@ -215,9 +227,11 @@ public enum PrimitiveType { builder.put(DATETIME, DATETIME); builder.put(DATETIME, DECIMALV2); builder.put(DATETIME, VARCHAR); + builder.put(DATETIME, STRING); // Char builder.put(CHAR, CHAR); builder.put(CHAR, VARCHAR); + builder.put(CHAR, STRING); // Varchar builder.put(VARCHAR, BOOLEAN); builder.put(VARCHAR, TINYINT); @@ -231,9 +245,27 @@ public enum PrimitiveType { builder.put(VARCHAR, DATETIME); builder.put(VARCHAR, DECIMALV2); builder.put(VARCHAR, VARCHAR); + builder.put(VARCHAR, STRING); builder.put(VARCHAR, HLL); builder.put(VARCHAR, BITMAP); + // Varchar + builder.put(STRING, BOOLEAN); + builder.put(STRING, TINYINT); + builder.put(STRING, SMALLINT); + builder.put(STRING, INT); + builder.put(STRING, BIGINT); + builder.put(STRING, LARGEINT); + builder.put(STRING, FLOAT); + builder.put(STRING, DOUBLE); + builder.put(STRING, DATE); + builder.put(STRING, DATETIME); + builder.put(STRING, DECIMALV2); + builder.put(STRING, VARCHAR); + builder.put(STRING, STRING); + builder.put(STRING, HLL); + builder.put(STRING, BITMAP); + // DecimalV2 builder.put(DECIMALV2, BOOLEAN); builder.put(DECIMALV2, TINYINT); @@ -245,14 +277,17 @@ public enum PrimitiveType { builder.put(DECIMALV2, DOUBLE); builder.put(DECIMALV2, DECIMALV2); builder.put(DECIMALV2, VARCHAR); - + builder.put(DECIMALV2, STRING); + // HLL builder.put(HLL, HLL); builder.put(HLL, VARCHAR); + builder.put(HLL, STRING); // BITMAP builder.put(BITMAP, BITMAP); builder.put(BITMAP, VARCHAR); + builder.put(BITMAP, STRING); //TIME builder.put(TIME, TIME); @@ -294,6 +329,7 @@ public enum PrimitiveType { supportedTypes.add(FLOAT); supportedTypes.add(DOUBLE); supportedTypes.add(VARCHAR); + supportedTypes.add(STRING); supportedTypes.add(HLL); supportedTypes.add(CHAR); supportedTypes.add(DATE); @@ -336,7 +372,7 @@ public enum PrimitiveType { private static PrimitiveType[][] compatibilityMatrix; static { - compatibilityMatrix = new PrimitiveType[BINARY.ordinal() + 1][BINARY.ordinal() + 1]; + compatibilityMatrix = new PrimitiveType[PrimitiveType.values().length][PrimitiveType.values().length]; // NULL_TYPE is compatible with any type and results in the non-null type. compatibilityMatrix[NULL_TYPE.ordinal()][NULL_TYPE.ordinal()] = NULL_TYPE; @@ -352,6 +388,7 @@ public enum PrimitiveType { compatibilityMatrix[NULL_TYPE.ordinal()][DATETIME.ordinal()] = DATETIME; compatibilityMatrix[NULL_TYPE.ordinal()][CHAR.ordinal()] = CHAR; compatibilityMatrix[NULL_TYPE.ordinal()][VARCHAR.ordinal()] = VARCHAR; + compatibilityMatrix[NULL_TYPE.ordinal()][STRING.ordinal()] = STRING; compatibilityMatrix[NULL_TYPE.ordinal()][DECIMALV2.ordinal()] = DECIMALV2; compatibilityMatrix[NULL_TYPE.ordinal()][TIME.ordinal()] = TIME; compatibilityMatrix[NULL_TYPE.ordinal()][BITMAP.ordinal()] = BITMAP; @@ -368,6 +405,7 @@ public enum PrimitiveType { compatibilityMatrix[BOOLEAN.ordinal()][DATETIME.ordinal()] = INVALID_TYPE; compatibilityMatrix[BOOLEAN.ordinal()][CHAR.ordinal()] = INVALID_TYPE; compatibilityMatrix[BOOLEAN.ordinal()][VARCHAR.ordinal()] = INVALID_TYPE; + compatibilityMatrix[BOOLEAN.ordinal()][STRING.ordinal()] = INVALID_TYPE; compatibilityMatrix[BOOLEAN.ordinal()][DECIMALV2.ordinal()] = DECIMALV2; compatibilityMatrix[BOOLEAN.ordinal()][TIME.ordinal()] = TIME; @@ -382,6 +420,7 @@ public enum PrimitiveType { compatibilityMatrix[TINYINT.ordinal()][DATETIME.ordinal()] = INVALID_TYPE; compatibilityMatrix[TINYINT.ordinal()][CHAR.ordinal()] = INVALID_TYPE; compatibilityMatrix[TINYINT.ordinal()][VARCHAR.ordinal()] = INVALID_TYPE; + compatibilityMatrix[TINYINT.ordinal()][STRING.ordinal()] = INVALID_TYPE; compatibilityMatrix[TINYINT.ordinal()][DECIMALV2.ordinal()] = DECIMALV2; compatibilityMatrix[TINYINT.ordinal()][TIME.ordinal()] = TIME; @@ -395,6 +434,7 @@ public enum PrimitiveType { compatibilityMatrix[SMALLINT.ordinal()][DATETIME.ordinal()] = INVALID_TYPE; compatibilityMatrix[SMALLINT.ordinal()][CHAR.ordinal()] = INVALID_TYPE; compatibilityMatrix[SMALLINT.ordinal()][VARCHAR.ordinal()] = INVALID_TYPE; + compatibilityMatrix[SMALLINT.ordinal()][STRING.ordinal()] = INVALID_TYPE; compatibilityMatrix[SMALLINT.ordinal()][DECIMALV2.ordinal()] = DECIMALV2; compatibilityMatrix[SMALLINT.ordinal()][TIME.ordinal()] = TIME; @@ -407,6 +447,7 @@ public enum PrimitiveType { compatibilityMatrix[INT.ordinal()][DATETIME.ordinal()] = INVALID_TYPE; compatibilityMatrix[INT.ordinal()][CHAR.ordinal()] = INVALID_TYPE; compatibilityMatrix[INT.ordinal()][VARCHAR.ordinal()] = INVALID_TYPE; + compatibilityMatrix[INT.ordinal()][STRING.ordinal()] = INVALID_TYPE; compatibilityMatrix[INT.ordinal()][DECIMALV2.ordinal()] = DECIMALV2; compatibilityMatrix[INT.ordinal()][TIME.ordinal()] = TIME; @@ -418,6 +459,7 @@ public enum PrimitiveType { compatibilityMatrix[BIGINT.ordinal()][DATETIME.ordinal()] = INVALID_TYPE; compatibilityMatrix[BIGINT.ordinal()][CHAR.ordinal()] = INVALID_TYPE; compatibilityMatrix[BIGINT.ordinal()][VARCHAR.ordinal()] = INVALID_TYPE; + compatibilityMatrix[BIGINT.ordinal()][STRING.ordinal()] = INVALID_TYPE; compatibilityMatrix[BIGINT.ordinal()][DECIMALV2.ordinal()] = DECIMALV2; compatibilityMatrix[BIGINT.ordinal()][TIME.ordinal()] = TIME; @@ -428,6 +470,7 @@ public enum PrimitiveType { compatibilityMatrix[LARGEINT.ordinal()][DATETIME.ordinal()] = INVALID_TYPE; compatibilityMatrix[LARGEINT.ordinal()][CHAR.ordinal()] = INVALID_TYPE; compatibilityMatrix[LARGEINT.ordinal()][VARCHAR.ordinal()] = INVALID_TYPE; + compatibilityMatrix[LARGEINT.ordinal()][STRING.ordinal()] = INVALID_TYPE; compatibilityMatrix[LARGEINT.ordinal()][DECIMALV2.ordinal()] = DECIMALV2; compatibilityMatrix[LARGEINT.ordinal()][TIME.ordinal()] = TIME; @@ -437,6 +480,7 @@ public enum PrimitiveType { compatibilityMatrix[FLOAT.ordinal()][DATETIME.ordinal()] = INVALID_TYPE; compatibilityMatrix[FLOAT.ordinal()][CHAR.ordinal()] = INVALID_TYPE; compatibilityMatrix[FLOAT.ordinal()][VARCHAR.ordinal()] = INVALID_TYPE; + compatibilityMatrix[FLOAT.ordinal()][STRING.ordinal()] = INVALID_TYPE; compatibilityMatrix[FLOAT.ordinal()][DECIMALV2.ordinal()] = DECIMALV2; compatibilityMatrix[FLOAT.ordinal()][TIME.ordinal()] = TIME; @@ -445,6 +489,7 @@ public enum PrimitiveType { compatibilityMatrix[DOUBLE.ordinal()][DATETIME.ordinal()] = INVALID_TYPE; compatibilityMatrix[DOUBLE.ordinal()][CHAR.ordinal()] = INVALID_TYPE; compatibilityMatrix[DOUBLE.ordinal()][VARCHAR.ordinal()] = INVALID_TYPE; + compatibilityMatrix[DOUBLE.ordinal()][STRING.ordinal()] = INVALID_TYPE; compatibilityMatrix[DOUBLE.ordinal()][DECIMALV2.ordinal()] = DECIMALV2; compatibilityMatrix[DOUBLE.ordinal()][TIME.ordinal()] = TIME; @@ -452,24 +497,32 @@ public enum PrimitiveType { compatibilityMatrix[DATE.ordinal()][DATETIME.ordinal()] = DATETIME; compatibilityMatrix[DATE.ordinal()][CHAR.ordinal()] = INVALID_TYPE; compatibilityMatrix[DATE.ordinal()][VARCHAR.ordinal()] = INVALID_TYPE; + compatibilityMatrix[DATE.ordinal()][STRING.ordinal()] = INVALID_TYPE; compatibilityMatrix[DATE.ordinal()][DECIMALV2.ordinal()] = INVALID_TYPE; compatibilityMatrix[DATE.ordinal()][TIME.ordinal()] = INVALID_TYPE; compatibilityMatrix[DATETIME.ordinal()][DATETIME.ordinal()] = DATETIME; compatibilityMatrix[DATETIME.ordinal()][CHAR.ordinal()] = INVALID_TYPE; compatibilityMatrix[DATETIME.ordinal()][VARCHAR.ordinal()] = INVALID_TYPE; + compatibilityMatrix[DATETIME.ordinal()][STRING.ordinal()] = INVALID_TYPE; compatibilityMatrix[DATETIME.ordinal()][DECIMALV2.ordinal()] = INVALID_TYPE; compatibilityMatrix[DATETIME.ordinal()][TIME.ordinal()] = INVALID_TYPE; compatibilityMatrix[CHAR.ordinal()][CHAR.ordinal()] = CHAR; compatibilityMatrix[CHAR.ordinal()][VARCHAR.ordinal()] = VARCHAR; + compatibilityMatrix[CHAR.ordinal()][STRING.ordinal()] = STRING; compatibilityMatrix[CHAR.ordinal()][DECIMALV2.ordinal()] = INVALID_TYPE; compatibilityMatrix[CHAR.ordinal()][TIME.ordinal()] = INVALID_TYPE; compatibilityMatrix[VARCHAR.ordinal()][VARCHAR.ordinal()] = VARCHAR; + compatibilityMatrix[VARCHAR.ordinal()][STRING.ordinal()] = STRING; compatibilityMatrix[VARCHAR.ordinal()][DECIMALV2.ordinal()] = INVALID_TYPE; compatibilityMatrix[VARCHAR.ordinal()][TIME.ordinal()] = INVALID_TYPE; + compatibilityMatrix[STRING.ordinal()][STRING.ordinal()] = STRING; + compatibilityMatrix[STRING.ordinal()][DECIMALV2.ordinal()] = INVALID_TYPE; + compatibilityMatrix[STRING.ordinal()][TIME.ordinal()] = INVALID_TYPE; + compatibilityMatrix[DECIMALV2.ordinal()][DECIMALV2.ordinal()] = DECIMALV2; compatibilityMatrix[DECIMALV2.ordinal()][TIME.ordinal()] = INVALID_TYPE; @@ -514,8 +567,6 @@ public enum PrimitiveType { public static PrimitiveType fromThrift(TPrimitiveType tPrimitiveType) { switch (tPrimitiveType) { - case INVALID_TYPE: - return INVALID_TYPE; case NULL_TYPE: return NULL_TYPE; case BOOLEAN: @@ -546,6 +597,8 @@ public enum PrimitiveType { return TIME; case VARCHAR: return VARCHAR; + case STRING: + return STRING; case CHAR: return CHAR; case HLL: @@ -651,11 +704,11 @@ public enum PrimitiveType { } public boolean isStringType() { - return (this == VARCHAR || this == CHAR || this == HLL); + return (this == VARCHAR || this == CHAR || this == HLL || this == STRING); } public boolean isCharFamily() { - return (this == VARCHAR || this == CHAR); + return (this == VARCHAR || this == CHAR || this == STRING); } public boolean isIntegerType() { @@ -709,6 +762,8 @@ public enum PrimitiveType { case CHAR: // char index size is length return -1; + case STRING: + return STRING_INDEX_LEN; case DECIMALV2: return DECIMAL_INDEX_LEN; default: diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java index 3f60a594ab..5f216d3a98 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java @@ -161,6 +161,7 @@ public class ScalarFunction extends Function { case VARCHAR: case HLL: case BITMAP: + case STRING: beFn += "_string_val"; break; case DATE: diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java index 2e0c91f076..fd2155786f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java @@ -51,6 +51,10 @@ public class ScalarType extends Type { // Longest supported VARCHAR and CHAR, chosen to match Hive. public static final int MAX_VARCHAR_LENGTH = 65533; + + // 2GB - 4 4bytes for storage string length + public static final int MAX_STRING_LENGTH = 2147483643; + public static final int MAX_CHAR_LENGTH = 255; // HLL DEFAULT LENGTH 2^14(registers) + 1(type) @@ -62,7 +66,6 @@ public class ScalarType extends Type { // Hive, mysql, sql server standard. public static final int MAX_PRECISION = 38; - public static final int MAX_SCALE = MAX_PRECISION; @SerializedName(value = "type") private final PrimitiveType type; @@ -92,6 +95,8 @@ public class ScalarType extends Type { return createCharType(len); case VARCHAR: return createVarcharType(len); + case STRING: + return createStringType(); case DECIMALV2: return createDecimalV2Type(precision, scale); default: @@ -123,6 +128,8 @@ public class ScalarType extends Type { return CHAR; case VARCHAR: return createVarcharType(); + case STRING: + return createStringType(); case HLL: return createHllType(); case BITMAP: @@ -168,6 +175,8 @@ public class ScalarType extends Type { return CHAR; case "VARCHAR": return createVarcharType(); + case "STRING": + return createStringType(); case "HLL": return createHllType(); case "BITMAP": @@ -233,6 +242,13 @@ public class ScalarType extends Type { return type; } + public static ScalarType createStringType() { + // length checked in analysis + ScalarType type = new ScalarType(PrimitiveType.STRING); + type.len = -1; + return type; + } + public static ScalarType createVarchar(int len) { // length checked in analysis ScalarType type = new ScalarType(PrimitiveType.VARCHAR); @@ -267,6 +283,8 @@ public class ScalarType extends Type { return "VARCHAR(*)"; } return "VARCHAR(" + len + ")"; + } else if (type == PrimitiveType.STRING) { + return "STRING"; } return type.toString(); } @@ -301,6 +319,7 @@ public class ScalarType extends Type { case DATE: case DATETIME: case HLL: + case STRING: case BITMAP: stringBuilder.append(type.toString().toLowerCase()); break; @@ -323,42 +342,27 @@ public class ScalarType extends Type { public void toThrift(TTypeDesc container) { TTypeNode node = new TTypeNode(); container.types.add(node); + node.setType(TTypeNodeType.SCALAR); + TScalarType scalarType = new TScalarType(); + scalarType.setType(type.toThrift()); + switch(type) { case VARCHAR: case CHAR: - case HLL: { - node.setType(TTypeNodeType.SCALAR); - TScalarType scalarType = new TScalarType(); - scalarType.setType(type.toThrift()); + case HLL: + case STRING: { scalarType.setLen(len); - node.setScalarType(scalarType); break; } case DECIMALV2: { - node.setType(TTypeNodeType.SCALAR); - TScalarType scalarType = new TScalarType(); - scalarType.setType(type.toThrift()); scalarType.setScale(scale); scalarType.setPrecision(precision); - node.setScalarType(scalarType); break; } - default: { - node.setType(TTypeNodeType.SCALAR); - TScalarType scalarType = new TScalarType(); - scalarType.setType(type.toThrift()); - node.setScalarType(scalarType); + default: break; - } } - } - - public static Type[] toColumnType(PrimitiveType[] types) { - Type result[] = new Type[types.length]; - for (int i = 0; i < types.length; ++i) { - result[i] = createType(types[i]); - } - return result; + node.setScalarType(scalarType); } public int decimalPrecision() { @@ -599,6 +603,9 @@ public class ScalarType extends Type { } if (t1.isStringType() || t2.isStringType()) { + if (t1.type == PrimitiveType.STRING || t2.type == PrimitiveType.STRING) { + return createStringType(); + } return createVarcharType(Math.max(t1.len, t2.len)); } @@ -668,6 +675,8 @@ public class ScalarType extends Type { return 16385; case BITMAP: return 1024; // this is a estimated value + case STRING: + return 1024; default: return 0; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/SchemaTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/SchemaTable.java index ade3ef356b..abd44d1173 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/SchemaTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/SchemaTable.java @@ -38,7 +38,6 @@ import java.util.Map; public class SchemaTable extends Table { private final static int FN_REFLEN = 512; private final static int NAME_CHAR_LEN = 64; - private final static int MAX_FIELD_VARCHARLENGTH = 65535; private final static int MY_CS_NAME_SIZE = 32; private final static int GRANTEE_len = 81; private final static int PRIVILEGE_TYPE_LEN = 64; @@ -49,11 +48,6 @@ public class SchemaTable extends Table { super(id, name, type, baseSchema); } - protected SchemaTable(long id, String name, SchemaTableType type) { - super(TableType.SCHEMA); - schemaTableType = type; - } - @Override public void write(DataOutput out) throws IOException { throw new UnsupportedOperationException("Do not allow to write SchemaTable to image."); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java index 541f838225..03e5b5c01a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java @@ -65,6 +65,7 @@ public abstract class Type { public static final ScalarType DATE = new ScalarType(PrimitiveType.DATE); public static final ScalarType DATETIME = new ScalarType(PrimitiveType.DATETIME); public static final ScalarType TIME = new ScalarType(PrimitiveType.TIME); + public static final ScalarType STRING = new ScalarType(PrimitiveType.STRING); public static final ScalarType DEFAULT_DECIMALV2 = (ScalarType) ScalarType.createDecimalV2Type(ScalarType.DEFAULT_PRECISION, ScalarType.DEFAULT_SCALE); @@ -117,6 +118,8 @@ public abstract class Type { supportedTypes.add(DATETIME); supportedTypes.add(DECIMALV2); supportedTypes.add(TIME); + supportedTypes.add(STRING); + } public static ArrayList getIntegerTypes() { @@ -177,7 +180,9 @@ public abstract class Type { public boolean isWildcardChar() { return false; } public boolean isStringType() { - return isScalarType(PrimitiveType.VARCHAR) || isScalarType(PrimitiveType.CHAR); + return isScalarType(PrimitiveType.VARCHAR) + || isScalarType(PrimitiveType.CHAR) + || isScalarType(PrimitiveType.STRING); } // only metric types have the following constraint: @@ -487,6 +492,8 @@ public abstract class Type { return Type.CHAR; case VARCHAR: return Type.VARCHAR; + case STRING: + return Type.STRING; case HLL: return Type.HLL; case ARRAY: @@ -617,6 +624,7 @@ public abstract class Type { switch (t.getPrimitiveType()) { case CHAR: case VARCHAR: + case STRING: case HLL: return t.getLength(); default: @@ -759,6 +767,8 @@ public abstract class Type { compatibilityMatrix[BOOLEAN.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[BOOLEAN.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; compatibilityMatrix[BOOLEAN.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[BOOLEAN.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; + // TINYINT compatibilityMatrix[TINYINT.ordinal()][SMALLINT.ordinal()] = PrimitiveType.SMALLINT; @@ -776,6 +786,7 @@ public abstract class Type { compatibilityMatrix[TINYINT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[TINYINT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; compatibilityMatrix[TINYINT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[TINYINT.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; // SMALLINT compatibilityMatrix[SMALLINT.ordinal()][INT.ordinal()] = PrimitiveType.INT; @@ -792,6 +803,7 @@ public abstract class Type { compatibilityMatrix[SMALLINT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[SMALLINT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; compatibilityMatrix[SMALLINT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[SMALLINT.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; // INT compatibilityMatrix[INT.ordinal()][BIGINT.ordinal()] = PrimitiveType.BIGINT; @@ -811,6 +823,7 @@ public abstract class Type { compatibilityMatrix[INT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[INT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; compatibilityMatrix[INT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[INT.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; // BIGINT // 64 bit integer does not fit in mantissa of double or float. @@ -831,6 +844,7 @@ public abstract class Type { compatibilityMatrix[BIGINT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[BIGINT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; compatibilityMatrix[BIGINT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[BIGINT.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; // LARGEINT compatibilityMatrix[LARGEINT.ordinal()][FLOAT.ordinal()] = PrimitiveType.DOUBLE; @@ -843,6 +857,7 @@ public abstract class Type { compatibilityMatrix[LARGEINT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[LARGEINT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; compatibilityMatrix[LARGEINT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[LARGEINT.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; // FLOAT compatibilityMatrix[FLOAT.ordinal()][DOUBLE.ordinal()] = PrimitiveType.DOUBLE; @@ -854,6 +869,7 @@ public abstract class Type { compatibilityMatrix[FLOAT.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[FLOAT.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; compatibilityMatrix[FLOAT.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[FLOAT.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; // DOUBLE compatibilityMatrix[DOUBLE.ordinal()][DATE.ordinal()] = PrimitiveType.INVALID_TYPE; @@ -864,6 +880,7 @@ public abstract class Type { compatibilityMatrix[DOUBLE.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DOUBLE.ordinal()][TIME.ordinal()] = PrimitiveType.DOUBLE; compatibilityMatrix[DOUBLE.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[DOUBLE.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; // DATE compatibilityMatrix[DATE.ordinal()][DATETIME.ordinal()] = PrimitiveType.DATETIME; @@ -873,6 +890,7 @@ public abstract class Type { compatibilityMatrix[DATE.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DATE.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DATE.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[DATE.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; // DATETIME compatibilityMatrix[DATETIME.ordinal()][CHAR.ordinal()] = PrimitiveType.INVALID_TYPE; @@ -881,7 +899,8 @@ public abstract class Type { compatibilityMatrix[DATETIME.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DATETIME.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DATETIME.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; - + compatibilityMatrix[DATETIME.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; + // We can convert some but not all string values to timestamps. // CHAR compatibilityMatrix[CHAR.ordinal()][VARCHAR.ordinal()] = PrimitiveType.VARCHAR; @@ -889,24 +908,40 @@ public abstract class Type { compatibilityMatrix[CHAR.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[CHAR.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[CHAR.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[CHAR.ordinal()][STRING.ordinal()] = PrimitiveType.STRING; // VARCHAR compatibilityMatrix[VARCHAR.ordinal()][DECIMALV2.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[VARCHAR.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[VARCHAR.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[VARCHAR.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[VARCHAR.ordinal()][STRING.ordinal()] = PrimitiveType.STRING; + + //String + compatibilityMatrix[STRING.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[STRING.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[STRING.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + // DECIMALV2 compatibilityMatrix[DECIMALV2.ordinal()][HLL.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DECIMALV2.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[DECIMALV2.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[DECIMALV2.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; + // HLL compatibilityMatrix[HLL.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; compatibilityMatrix[HLL.ordinal()][BITMAP.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[HLL.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; + // BITMAP compatibilityMatrix[BITMAP.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; + compatibilityMatrix[BITMAP.ordinal()][STRING.ordinal()] = PrimitiveType.INVALID_TYPE; + + // TIME + compatibilityMatrix[TIME.ordinal()][TIME.ordinal()] = PrimitiveType.INVALID_TYPE; // Check all of the necessary entries that should be filled. // ignore binary @@ -929,7 +964,6 @@ public abstract class Type { } } - public Type getResultType() { switch (this.getPrimitiveType()) { case BOOLEAN: @@ -953,6 +987,8 @@ public abstract class Type { return VARCHAR; case DECIMALV2: return DECIMALV2; + case STRING: + return STRING; default: return INVALID; @@ -1033,6 +1069,7 @@ public abstract class Type { case TIME: case CHAR: case VARCHAR: + case STRING: case HLL: return Type.DOUBLE; case DECIMALV2: diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java index 6a71920d8a..557c2a6ece 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java @@ -68,6 +68,7 @@ public class Util { TYPE_STRING_MAP.put(PrimitiveType.DATETIME, "datetime"); TYPE_STRING_MAP.put(PrimitiveType.CHAR, "char(%d)"); TYPE_STRING_MAP.put(PrimitiveType.VARCHAR, "varchar(%d)"); + TYPE_STRING_MAP.put(PrimitiveType.STRING, "string"); TYPE_STRING_MAP.put(PrimitiveType.DECIMALV2, "decimal(%d,%d)"); TYPE_STRING_MAP.put(PrimitiveType.HLL, "varchar(%d)"); TYPE_STRING_MAP.put(PrimitiveType.BOOLEAN, "bool"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/HttpServer.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/HttpServer.java index 0a1b6543aa..d52c73cc5e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/HttpServer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/HttpServer.java @@ -86,6 +86,7 @@ public class HttpServer extends SpringBootServletInitializer { // To avoid some unexpected behavior. System.setProperty("spring.devtools.restart.enabled", "false"); System.setProperty("spring.http.multipart.location", PaloFe.DORIS_HOME_DIR); + System.setProperty("spring.banner.image.location", "doris-logo.png"); properties.put("logging.config", Config.custom_config_dir + "/" + SpringLog4j2Config.SPRING_LOG_XML_FILE); new SpringApplicationBuilder() .sources(HttpServer.class) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index 7916dca932..cc2ff1e193 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -524,7 +524,7 @@ public class OlapScanNode extends ScanNode { scanBackendIds.add(backend.getId()); } if (tabletIsNull) { - throw new UserException(tabletId + "have no alive replicas"); + throw new UserException("tablet: " + tabletId + " have no alive replicas."); } TScanRange scanRange = new TScanRange(); scanRange.setPaloScanRange(paloRange); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/cache/PartitionRange.java b/fe/fe-core/src/main/java/org/apache/doris/qe/cache/PartitionRange.java index 073ffd7501..91e393ea1c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/cache/PartitionRange.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/cache/PartitionRange.java @@ -174,6 +174,7 @@ public class PartitionRange { case DECIMALV2: case CHAR: case VARCHAR: + case STRING: case LARGEINT: LOG.info("PartitionCache not support such key type {}", type.toSql()); return false; diff --git a/fe/fe-core/src/main/resources/doris-logo.png b/fe/fe-core/src/main/resources/doris-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..c411a23bc73c07ead646bff4d611a6878d58877e GIT binary patch literal 3304 zcmeAS@N?(olHy`uVBq!ia0y~yV1CZPz!=BKz`($8{(f*P0|Ns~x}&cn1H;CC?mvmF z3=9kk$sR$z3=CCj3=9n|3=F>*7#JE}Fff!FFfhDIU|_JC!N4G1FlSew4FdxMQi`0|WPCPZ!6Kid%2* zdT(33OOP#L&z%J;6*DxXW}Vp8Yhv8u)7qoL)2PNPmU3HR<*E?Xgxh*^r)HO*ySc|s z?iVYEquj2nc{&mS@-8?&!vwP>=`%_b+TU+;H+cZ17lB2Qx zU;izfsY=yiTkl z?mCyvh3QMOS?68f%5?s9R3rN@o|5>`@>joDN}@x}Lra$)V6J7^74(2LR`WyCI*|&` z@1dp3*YXE!4=oK2z0SF3$!nIlOQlTuFRdEIL1u@Rg0)RO(7ns+forVlhehjzDqixe z7Osda4=w#|eyw+%U1IFwT|a|Mdr$f8XkNE2J7jBp&%T7%#f$zYf0_E?-}@!y^^@L} zeBHOo?oV&&-_zXRG8gQ+_jApn>zAh=4_vqGeE;bkOXvODF=5qG|MT86*UkF3W5x^Z zmybV}*j>47yEbpj*Ol|~G9QGNIy3)Y{6F0HJk$Th|GgnpuHo)X zd-r*Nnq94XBS?$qgZgXFE?@uRV6O6k|L)wl3l;yqCHI7{aai71|Ki)9usM186?sX^ zE+6O>)o_p9M9B$xqNZEQYd4!Y4PT~E>C{; z>%w;z9k@TgzB14};lZT_UZ>!@MtcQIoU5&U&#X<^7GrYZGGn606>-nwjrsFWZ&<`v z?ya*%Z}usn+DRocKg?o2dRRqlw3vQ!nURaUsx14xBiVe8?AmPn;xkvN{!(~p@!{Fe zGE=#{HM(wEvDHp2bv{<+D`&akPQ;sCIT73X)JsHu#F}rCdwo?+nr+!8 zkNK&xtab5O+OHR0IIO*B_lEW+yOUNte{$nr#a?sX^Wf@(w>|&nC|^!|@P2pQlB>$s3x0pE1zWOcUly|}&bMbX_QOgWCCM_zbq z1+0vE@pwZ)-n$j|jwj7{t?)rl#NqI(%x3eu?Mt_0tZKZt{cL2BEX%$24_o)lTVnd| zp5^7^0XdC)e5%(RA3Xm5ZjasZK<~+Ue1|r09+~|r?o#lB{6c*Wt9y?^nDTB~Rjnvj zy?fY8{A%3Hd+ldu6~1&2*6rCh?Te<&Th+Jxe)0`0ujl6Q&%K_@vB&e|_dRw^tcR_x z%fB#>G*6tIyXvdV`oqp#=gs+iufApWdujNA?=8Pye3Qw8V%=-MwX1mc_@`SReyx4s zVQJgny>iU^ml@Y;zmQ#Pb8y4`mx>=W-yN@F=ktpYp3%d-%YXS%gBN-)9A7ycl!?2h zE>j+KSH;$Y_u6go66x%7C0o^ABwMZiZBVx4SB0nfZMWzO&eAo9H-2Z{8*sh#fOSRn zlgip#%iM=#Y?uc-hJ19_&K-Ljaly^ z_KTN%mPor<9lEOgA>Xqmy9K|LFVAoMzQT0Ln*J3sU!xZ#_`Y8GF6FP|-*nftSGSwL zQr0>;v(CHQ^3w14SLKoX?y5&+!@ta#SC)F=!By5v>LI1if7z|FT>86&uXmU7uURj@ zdBopaQtmQ8(D2;XT)kf@eV5x8G=F^DxNiaLvt67gqD!jS_cAqKj<$LrT6^*f-@9Ki z``cCbdmevlRy}F&lKTf-ejlIuMQ-Y;SjO@TF7gf!lfJ0EUBFz+<9qUF#)Ybe+piDs zTCM*bVG(z|-N*erFKcb(~wSywdwg zc_;7dzhGX>|KI@kuMd2(VPc;14;ZoTt#7tFe95+{XZaECs_$lV?F6|`=W$nkHv2p8 zrL)wz*QbAfF}r+F@S(8zet+|sd|t}&*L~k#5Vz33^4p^6`tkA?+8i>MmPhRQQGDlv zneF?Z8=78A|NC=q{-ySUy6>4^IKJqKR<8NJ(Bji|v%9Rd0_$^sg}m5c$J)Ms-Qgek z>(^8nXkQACH2=B1t!^d%12vx4f9qOqzW(|1`!9ws6W^zcT7;EH-cj9azhio%(lznG z?~!-vtJPU=i*As+=W*)ZVZP;GOO{-`cw^bDqqg!~D%Jn*#a}6`v|8MMTQTjnp2wZ~ z+hrRYc84zVZ#sA6Z`Zwp*O(0pA4APB+UlBv9qRZ{jry7<;q`XCY*m^TgNv$&2IVn$~)Y&V+yDRhFS{iY9QsNR zAG!Ns*Oli-npe67%3ItuD%`%uj!AZk?yA|`yOJNhtElDBc#+N0ep57TYfs+Oy0Db$aDzTUlX|Fz>e&!Tz{O_^CI^F90T>uGC_zPbN9Ny^Q>&-&GN_upd65*JLB zcwMi0SM*mXSKy0k&HkDiJW8PlaewugjFSa!k9fM1^%-y!RQ2(b>eU#!m1@9G0 z-tYB4Al$LRj_>u$g~7{qAKnrF+I!Bq`oQ~YHx_GtTP*e{?v?x9<%th=@?1TVKcNWUIm)691nChFzrL|vnEr^bBtY(QOxvsGCF%Z0&K;3`vB2fXmaE|2d4_stC6=ND_eyOf?-F7kcm!78zPKMx#>{nNAj{!fNm0^IL+Cp`Xl zkN?8mnLFxzzNbI>XIN`1{N4ZM^GE+Q|Hj<0U%Fr9`}ES!L3iS>uC2S%`0mT&xT^k} z&mYHCwcmdJc-|MOLjAqF{U2UE61ZOQ`z32>(YWKX_ro)_kL>-j&ad_R60v*1t3Jd% zIbe_u0ns~q2KSh->3YmdC;{h`0Cd4>pjkE-Rgg=8vBs- z3+vy{h3!^rpNZC$EL(Y96ALVzO;=X@>>=Sx7>}qW2-SDfX1s`RW|Fd4`5#G6b#p?fd zI+uJy?`FMz(RC~IYHesN>-2X@U+dXrURf1RFZp}FzCBlb*{ZzNul!=$-QwK6r*HZZU