From 4f744333c209080f20419c2c7a0ef60ca87275f2 Mon Sep 17 00:00:00 2001 From: Zhengguo Yang Date: Fri, 10 Sep 2021 09:52:03 +0800 Subject: [PATCH] fix some core in local test: (#6594) 1. insert very large string value may coredump 2. some analitic functiuon and agg function result may be incorrect 3. string compare may be coredump when string type is too large 4. string type in delete condition can not process correctly 5. add text/blob as alias of string to compitable with mysql 6. fix string type min/max agg may process incorrectly --- be/src/exec/exec_node.h | 2 +- be/src/exec/partitioned_aggregation_node.cc | 30 ++--- be/src/exprs/runtime_filter.cpp | 2 + be/src/olap/aggregate_func.h | 1 - be/src/olap/bloom_filter_predicate.cpp | 3 +- be/src/olap/delete_handler.cpp | 3 +- be/src/olap/field.h | 1 - be/src/olap/push_handler.cpp | 105 +++++++++--------- be/src/runtime/buffered_tuple_stream3.cc | 14 +-- be/src/runtime/buffered_tuple_stream3.h | 6 - be/src/runtime/row_batch.cpp | 10 +- be/src/runtime/row_batch.h | 3 +- be/src/runtime/string_value.hpp | 25 +++-- be/src/runtime/string_value_ir.cpp | 2 +- be/src/util/sse2neo.h | 0 fe/fe-core/src/main/cup/sql_parser.cup | 12 +- .../doris/alter/SchemaChangeHandler.java | 17 --- .../doris/analysis/CreateTableStmt.java | 8 -- .../org/apache/doris/catalog/ColumnType.java | 7 +- .../org/apache/doris/catalog/FunctionSet.java | 12 +- .../apache/doris/catalog/PrimitiveType.java | 2 + .../org/apache/doris/catalog/ScalarType.java | 8 +- .../java/org/apache/doris/common/Config.java | 17 --- fe/fe-core/src/main/jflex/sql_scanner.flex | 2 + gensrc/thrift/PlanNodes.thrift | 3 +- 25 files changed, 130 insertions(+), 165 deletions(-) create mode 100644 be/src/util/sse2neo.h diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h index dfaeb2c0ca..fd6a7a1b82 100644 --- a/be/src/exec/exec_node.h +++ b/be/src/exec/exec_node.h @@ -48,7 +48,7 @@ class MemTracker; namespace vectorized { class Block; class VExpr; -} +} // namespace vectorized using std::string; using std::stringstream; diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc index 8f31ad80f3..088a1f2f00 100644 --- a/be/src/exec/partitioned_aggregation_node.cc +++ b/be/src/exec/partitioned_aggregation_node.cc @@ -276,8 +276,7 @@ Status PartitionedAggregationNode::open(RuntimeState* state) { if (!is_streaming_preagg_ && needs_serialize_) { serialize_stream_.reset(new BufferedTupleStream3( state, &intermediate_row_desc_, &_buffer_pool_client, - _resource_profile.spillable_buffer_size, - _resource_profile.max_row_buffer_size)); + _resource_profile.spillable_buffer_size)); RETURN_IF_ERROR(serialize_stream_->Init(id(), false)); bool got_buffer; // Reserve the memory for 'serialize_stream_' so we don't need to scrounge up @@ -349,7 +348,8 @@ Status PartitionedAggregationNode::get_next(RuntimeState* state, RowBatch* row_b // 3. `child(0)->rows_returned() == 0` mean not data from child // in level two aggregation node should return NULL result // level one aggregation node set `eos = true` return directly - if (UNLIKELY(grouping_exprs_.size() == 0 && !needs_finalize_ && child(0)->rows_returned() == 0)) { + if (UNLIKELY(grouping_exprs_.size() == 0 && !needs_finalize_ && + child(0)->rows_returned() == 0)) { *eos = true; return Status::OK(); } @@ -743,8 +743,7 @@ Status PartitionedAggregationNode::Partition::InitStreams() { aggregated_row_stream.reset(new BufferedTupleStream3( parent->state_, &parent->intermediate_row_desc_, &parent->_buffer_pool_client, - parent->_resource_profile.spillable_buffer_size, - parent->_resource_profile.max_row_buffer_size, external_varlen_slots)); + parent->_resource_profile.spillable_buffer_size, external_varlen_slots)); RETURN_IF_ERROR(aggregated_row_stream->Init(parent->id(), true)); bool got_buffer; RETURN_IF_ERROR(aggregated_row_stream->PrepareForWrite(&got_buffer)); @@ -755,8 +754,7 @@ Status PartitionedAggregationNode::Partition::InitStreams() { if (!parent->is_streaming_preagg_) { unaggregated_row_stream.reset(new BufferedTupleStream3( parent->state_, &(parent->child(0)->row_desc()), &parent->_buffer_pool_client, - parent->_resource_profile.spillable_buffer_size, - parent->_resource_profile.max_row_buffer_size)); + parent->_resource_profile.spillable_buffer_size)); // This stream is only used to spill, no need to ever have this pinned. RETURN_IF_ERROR(unaggregated_row_stream->Init(parent->id(), false)); // Save memory by waiting until we spill to allocate the write buffer for the @@ -827,8 +825,7 @@ Status PartitionedAggregationNode::Partition::SerializeStreamForSpilling() { // freed at least one buffer from this partition's (old) aggregated_row_stream. parent->serialize_stream_.reset(new BufferedTupleStream3( parent->state_, &parent->intermediate_row_desc_, &parent->_buffer_pool_client, - parent->_resource_profile.spillable_buffer_size, - parent->_resource_profile.max_row_buffer_size)); + parent->_resource_profile.spillable_buffer_size)); status = parent->serialize_stream_->Init(parent->id(), false); if (status.ok()) { bool got_buffer; @@ -926,10 +923,15 @@ Tuple* PartitionedAggregationNode::ConstructIntermediateTuple( const int tuple_data_size = fixed_size + varlen_size; uint8_t* tuple_data = pool->try_allocate(tuple_data_size); if (UNLIKELY(tuple_data == NULL)) { - string details = Substitute( - "Cannot perform aggregation at node with id $0. Failed " - "to allocate $1 bytes for intermediate tuple.", - _id, tuple_data_size); + stringstream str; + str << "Memory exceed limit. Cannot perform aggregation at node with id $0. Failed " + << "to allocate $1 bytes for intermediate tuple. " + << "Backend: " << BackendOptions::get_localhost() << ", " + << "fragment: " << print_id(state_->fragment_instance_id()) << " " + << "Used: " << pool->mem_tracker()->consumption() + << ", Limit: " << pool->mem_tracker()->limit() << ". " + << "You can change the limit by session variable exec_mem_limit."; + string details = Substitute(str.str(), _id, tuple_data_size); *status = pool->mem_tracker()->MemLimitExceeded(state_, details, tuple_data_size); return NULL; } @@ -1046,7 +1048,7 @@ Tuple* PartitionedAggregationNode::GetOutputTuple(const vectorrows_returned() == 0); + grouping_exprs_.size() == 0 && child(0)->rows_returned() == 0); } else { NewAggFnEvaluator::Serialize(agg_fn_evals, tuple); } diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index 574a837495..446eb10e0c 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -291,6 +291,8 @@ PrimitiveType to_primitive_type(PColumnType type) { return TYPE_VARCHAR; case PColumnType::COLUMN_TYPE_CHAR: return TYPE_CHAR; + case PColumnType::COLUMN_TYPE_STRING: + return TYPE_STRING; default: DCHECK(false); } diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h index 9233d1d8f2..afdaa8d0f4 100644 --- a/be/src/olap/aggregate_func.h +++ b/be/src/olap/aggregate_func.h @@ -411,7 +411,6 @@ template <> struct AggregateFuncTraits : public AggregateFuncTraits {}; - template <> struct AggregateFuncTraits : public AggregateFuncTraits {}; diff --git a/be/src/olap/bloom_filter_predicate.cpp b/be/src/olap/bloom_filter_predicate.cpp index 1a86b5dc9d..fca5a7e671 100644 --- a/be/src/olap/bloom_filter_predicate.cpp +++ b/be/src/olap/bloom_filter_predicate.cpp @@ -28,7 +28,8 @@ M(TYPE_CHAR) \ M(TYPE_DATE) \ M(TYPE_DATETIME) \ - M(TYPE_VARCHAR) + M(TYPE_VARCHAR) \ + M(TYPE_STRING) namespace doris { ColumnPredicate* BloomFilterColumnPredicateFactory::create_column_predicate( diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp index 2996979556..280d983b5f 100644 --- a/be/src/olap/delete_handler.cpp +++ b/be/src/olap/delete_handler.cpp @@ -141,8 +141,9 @@ bool DeleteConditionHandler::is_condition_value_valid(const TabletColumn& column return valid_decimal(value_str, column.precision(), column.frac()); case OLAP_FIELD_TYPE_CHAR: case OLAP_FIELD_TYPE_VARCHAR: - case OLAP_FIELD_TYPE_STRING: return value_str.size() <= column.length(); + case OLAP_FIELD_TYPE_STRING: + return value_str.size() <= OLAP_STRING_MAX_LENGTH; case OLAP_FIELD_TYPE_DATE: case OLAP_FIELD_TYPE_DATETIME: return valid_datetime(value_str); diff --git a/be/src/olap/field.h b/be/src/olap/field.h index f2f112998c..ffe0221c2d 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -91,7 +91,6 @@ public: if (dst_slice->size < src_slice->size) { *_long_text_buf = static_cast(realloc(*_long_text_buf, src_slice->size)); dst_slice->data = *_long_text_buf; - dst_slice->size = src_slice->size; } } _agg_info->update(dest, src, mem_pool); diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 0b9d3ac1b4..d907aeaceb 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -944,65 +944,66 @@ OLAPStatus PushBrokerReader::init(const Schema* schema, const TBrokerScanRange& return OLAP_SUCCESS; } -OLAPStatus PushBrokerReader::fill_field_row(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, FieldType type){ +OLAPStatus PushBrokerReader::fill_field_row(RowCursorCell* dst, const char* src, bool src_null, + MemPool* mem_pool, FieldType type) { switch (type) { - case OLAP_FIELD_TYPE_DECIMAL: { - dst->set_is_null(src_null); - if (src_null) { - break; - } - auto *decimal_value = reinterpret_cast(src); - auto *storage_decimal_value = reinterpret_cast(dst->mutable_cell_ptr()); - storage_decimal_value->integer = decimal_value->int_value(); - storage_decimal_value->fraction = decimal_value->frac_value(); + case OLAP_FIELD_TYPE_DECIMAL: { + dst->set_is_null(src_null); + if (src_null) { break; } - case OLAP_FIELD_TYPE_DATETIME: { - dst->set_is_null(src_null); - if (src_null) { - break; - } - - auto* datetime_value = reinterpret_cast(src); - auto* storage_datetime_value = reinterpret_cast(dst->mutable_cell_ptr()); - *storage_datetime_value = datetime_value->to_olap_datetime(); + auto* decimal_value = reinterpret_cast(src); + auto* storage_decimal_value = reinterpret_cast(dst->mutable_cell_ptr()); + storage_decimal_value->integer = decimal_value->int_value(); + storage_decimal_value->fraction = decimal_value->frac_value(); + break; + } + case OLAP_FIELD_TYPE_DATETIME: { + dst->set_is_null(src_null); + if (src_null) { break; } - case OLAP_FIELD_TYPE_DATE: { - dst->set_is_null(src_null); - if (src_null) { - break; - } + auto* datetime_value = reinterpret_cast(src); + auto* storage_datetime_value = reinterpret_cast(dst->mutable_cell_ptr()); + *storage_datetime_value = datetime_value->to_olap_datetime(); + break; + } - auto* date_value = reinterpret_cast(src); - auto* storage_date_value = reinterpret_cast(dst->mutable_cell_ptr()); - *storage_date_value = static_cast(date_value->to_olap_date()); + case OLAP_FIELD_TYPE_DATE: { + dst->set_is_null(src_null); + if (src_null) { break; } - case OLAP_FIELD_TYPE_BOOL: - case OLAP_FIELD_TYPE_TINYINT: - case OLAP_FIELD_TYPE_SMALLINT: - case OLAP_FIELD_TYPE_INT: - case OLAP_FIELD_TYPE_UNSIGNED_INT: - case OLAP_FIELD_TYPE_BIGINT: - case OLAP_FIELD_TYPE_LARGEINT: - case OLAP_FIELD_TYPE_FLOAT: - case OLAP_FIELD_TYPE_DOUBLE: - case OLAP_FIELD_TYPE_CHAR: - case OLAP_FIELD_TYPE_VARCHAR: - case OLAP_FIELD_TYPE_HLL: - case OLAP_FIELD_TYPE_OBJECT:{ - dst->set_is_null(src_null); - if (src_null) { - break; - } - const TypeInfo* type_info = get_type_info(type); - type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); + + auto* date_value = reinterpret_cast(src); + auto* storage_date_value = reinterpret_cast(dst->mutable_cell_ptr()); + *storage_date_value = static_cast(date_value->to_olap_date()); + break; + } + case OLAP_FIELD_TYPE_BOOL: + case OLAP_FIELD_TYPE_TINYINT: + case OLAP_FIELD_TYPE_SMALLINT: + case OLAP_FIELD_TYPE_INT: + case OLAP_FIELD_TYPE_UNSIGNED_INT: + case OLAP_FIELD_TYPE_BIGINT: + case OLAP_FIELD_TYPE_LARGEINT: + case OLAP_FIELD_TYPE_FLOAT: + case OLAP_FIELD_TYPE_DOUBLE: + case OLAP_FIELD_TYPE_CHAR: + case OLAP_FIELD_TYPE_VARCHAR: + case OLAP_FIELD_TYPE_HLL: + case OLAP_FIELD_TYPE_OBJECT: { + dst->set_is_null(src_null); + if (src_null) { break; } - default: - return OLAP_ERR_INVALID_SCHEMA; + const TypeInfo* type_info = get_type_info(type); + type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); + break; + } + default: + return OLAP_ERR_INVALID_SCHEMA; } return OLAP_SUCCESS; @@ -1033,9 +1034,11 @@ OLAPStatus PushBrokerReader::next(ContiguousRow* row) { const void* value = _tuple->get_slot(slot->tuple_offset()); FieldType type = _schema->column(i)->type(); - OLAPStatus field_status = fill_field_row(&cell, (const char*)value, is_null, _mem_pool.get(), type); - if (field_status!= OLAP_SUCCESS) { - LOG(WARNING) << "fill field row failed in spark load, slot index: " << i << ", type: " << type; + OLAPStatus field_status = + fill_field_row(&cell, (const char*)value, is_null, _mem_pool.get(), type); + if (field_status != OLAP_SUCCESS) { + LOG(WARNING) << "fill field row failed in spark load, slot index: " << i + << ", type: " << type; return OLAP_ERR_SCHEMA_SCHEMA_FIELD_INVALID; } } diff --git a/be/src/runtime/buffered_tuple_stream3.cc b/be/src/runtime/buffered_tuple_stream3.cc index 2ea57a7528..325c30a2d9 100644 --- a/be/src/runtime/buffered_tuple_stream3.cc +++ b/be/src/runtime/buffered_tuple_stream3.cc @@ -47,7 +47,7 @@ using BufferHandle = BufferPool::BufferHandle; BufferedTupleStream3::BufferedTupleStream3(RuntimeState* state, const RowDescriptor* row_desc, BufferPool::ClientHandle* buffer_pool_client, - int64_t default_page_len, int64_t max_page_len, + int64_t default_page_len, const std::set& ext_varlen_slots) : state_(state), desc_(row_desc), @@ -70,14 +70,11 @@ BufferedTupleStream3::BufferedTupleStream3(RuntimeState* state, const RowDescrip bytes_pinned_(0), num_rows_(0), default_page_len_(default_page_len), - max_page_len_(max_page_len), has_nullable_tuple_(row_desc->is_any_tuple_nullable()), delete_on_read_(false), closed_(false), pinned_(true) { - DCHECK_GE(max_page_len, default_page_len); DCHECK(BitUtil::IsPowerOf2(default_page_len)) << default_page_len; - DCHECK(BitUtil::IsPowerOf2(max_page_len)) << max_page_len; read_page_ = pages_.end(); for (int i = 0; i < desc_->tuple_descriptors().size(); ++i) { const TupleDescriptor* tuple_desc = desc_->tuple_descriptors()[i]; @@ -379,15 +376,6 @@ Status BufferedTupleStream3::NewWritePage(int64_t page_len) noexcept { } Status BufferedTupleStream3::CalcPageLenForRow(int64_t row_size, int64_t* page_len) { - if (UNLIKELY(row_size > max_page_len_)) { - std::stringstream ss; - ss << " execeed max row size, row size:" << PrettyPrinter::print(row_size, TUnit::BYTES) - << " node id:" << node_id_; - //<< " query option max row size:" - //<< PrettyPrinter::print - // (state_->query_options().max_row_size, TUnit::BYTES); - return Status::InternalError(ss.str()); - } *page_len = std::max(default_page_len_, BitUtil::RoundUpToPowerOfTwo(row_size)); return Status::OK(); } diff --git a/be/src/runtime/buffered_tuple_stream3.h b/be/src/runtime/buffered_tuple_stream3.h index 8aaf8f0df7..65b32309f1 100644 --- a/be/src/runtime/buffered_tuple_stream3.h +++ b/be/src/runtime/buffered_tuple_stream3.h @@ -211,7 +211,6 @@ public: /// ext_varlen_slots: set of varlen slots with data stored externally to the stream BufferedTupleStream3(RuntimeState* state, const RowDescriptor* row_desc, BufferPool::ClientHandle* buffer_pool_client, int64_t default_page_len, - int64_t max_page_len, const std::set& ext_varlen_slots = std::set()); virtual ~BufferedTupleStream3(); @@ -502,11 +501,6 @@ private: /// fit in a default-sized page are stored in default-sized page. const int64_t default_page_len_; - /// The maximum length in bytes of pages used to store the stream's rows. This is a - /// hard limit on the maximum size of row that can be stored in the stream and the - /// amount of reservation required to read or write to an unpinned stream. - const int64_t max_page_len_; - /// Whether any tuple in the rows is nullable. const bool has_nullable_tuple_; diff --git a/be/src/runtime/row_batch.cpp b/be/src/runtime/row_batch.cpp index 41b303df22..0c76a67187 100644 --- a/be/src/runtime/row_batch.cpp +++ b/be/src/runtime/row_batch.cpp @@ -380,7 +380,7 @@ int RowBatch::serialize(TRowBatch* output_batch) { _row_desc.to_thrift(&output_batch->row_tuples); output_batch->tuple_offsets.reserve(_num_rows * _num_tuples_per_row); - int size = total_byte_size(); + size_t size = total_byte_size(); output_batch->tuple_data.resize(size); // Copy tuple data, including strings, into output_batch (converting string @@ -412,7 +412,7 @@ int RowBatch::serialize(TRowBatch* output_batch) { if (config::compress_rowbatches && size > 0) { // Try compressing tuple_data to _compression_scratch, swap if compressed data is // smaller - int max_compressed_size = snappy::MaxCompressedLength(size); + size_t max_compressed_size = snappy::MaxCompressedLength(size); if (_compression_scratch.size() < max_compressed_size) { _compression_scratch.resize(max_compressed_size); @@ -448,7 +448,7 @@ int RowBatch::serialize(PRowBatch* output_batch) { // is_compressed output_batch->set_is_compressed(false); // tuple data - int size = total_byte_size(); + size_t size = total_byte_size(); auto mutable_tuple_data = output_batch->mutable_tuple_data(); mutable_tuple_data->resize(size); @@ -690,8 +690,8 @@ void RowBatch::deep_copy_to(RowBatch* dst) { dst->commit_rows(_num_rows); } // TODO: consider computing size of batches as they are built up -int RowBatch::total_byte_size() { - int result = 0; +size_t RowBatch::total_byte_size() { + size_t result = 0; // Sum total variable length byte sizes. for (int i = 0; i < _num_rows; ++i) { diff --git a/be/src/runtime/row_batch.h b/be/src/runtime/row_batch.h index a3e20f3a97..a25c1a74d8 100644 --- a/be/src/runtime/row_batch.h +++ b/be/src/runtime/row_batch.h @@ -44,7 +44,6 @@ class TupleRow; class TupleDescriptor; class PRowBatch; - // A RowBatch encapsulates a batch of rows, each composed of a number of tuples. // The maximum number of rows is fixed at the time of construction, and the caller // can add rows up to that capacity. @@ -171,7 +170,7 @@ public: // The total size of all data represented in this row batch (tuples and referenced // string data). - int total_byte_size(); + size_t total_byte_size(); TupleRow* get_row(int row_idx) const { DCHECK(_tuple_ptrs != NULL); diff --git a/be/src/runtime/string_value.hpp b/be/src/runtime/string_value.hpp index c18cee74f3..dd29bff24c 100644 --- a/be/src/runtime/string_value.hpp +++ b/be/src/runtime/string_value.hpp @@ -18,8 +18,9 @@ #ifndef DORIS_BE_SRC_QUERY_BE_RUNTIME_STRING_VALUE_INLINE_H #define DORIS_BE_SRC_QUERY_BE_RUNTIME_STRING_VALUE_INLINE_H -#include "runtime/string_value.h" #include + +#include "runtime/string_value.h" #include "util/cpu_info.h" #ifdef __SSE4_2__ #include "util/sse_util.hpp" @@ -37,16 +38,17 @@ namespace doris { // - s1/n1: ptr/len for the first string // - s2/n2: ptr/len for the second string // - len: min(n1, n2) - this can be more cheaply passed in by the caller -static inline int string_compare(const char* s1, int n1, const char* s2, int n2, int len) { +static inline int string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2, + int64_t len) { DCHECK_EQ(len, std::min(n1, n2)); #ifdef __SSE4_2__ if (CpuInfo::is_supported(CpuInfo::SSE4_2)) { while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) { __m128i xmm0 = _mm_loadu_si128(reinterpret_cast(s1)); __m128i xmm1 = _mm_loadu_si128(reinterpret_cast(s2)); - int chars_match = _mm_cmpestri(xmm0, sse_util::CHARS_PER_128_BIT_REGISTER, - xmm1, sse_util::CHARS_PER_128_BIT_REGISTER, - sse_util::STRCMP_MODE); + int chars_match = + _mm_cmpestri(xmm0, sse_util::CHARS_PER_128_BIT_REGISTER, xmm1, + sse_util::CHARS_PER_128_BIT_REGISTER, sse_util::STRCMP_MODE); if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) { return (unsigned char)s1[chars_match] - (unsigned char)s2[chars_match]; } @@ -57,11 +59,12 @@ static inline int string_compare(const char* s1, int n1, const char* s2, int n2, } #endif - // TODO: for some reason memcmp is way slower than strncmp (2.5x) why? - int result = strncmp(s1, s2, len); - - if (result != 0) { - return result; + unsigned char u1, u2; + while (len-- > 0) { + u1 = (unsigned char)*s1++; + u2 = (unsigned char)*s2++; + if (u1 != u2) return u1 - u2; + if (u1 == '\0') return n1 - n2; } return n1 - n2; @@ -117,6 +120,6 @@ inline StringValue StringValue::trim() const { return StringValue(ptr + begin, end - begin + 1); } -} +} // namespace doris #endif diff --git a/be/src/runtime/string_value_ir.cpp b/be/src/runtime/string_value_ir.cpp index bf43c370b7..a5df929092 100644 --- a/be/src/runtime/string_value_ir.cpp +++ b/be/src/runtime/string_value_ir.cpp @@ -20,7 +20,7 @@ #include "runtime/string_value.hpp" namespace doris { -int ir_string_compare(const char* s1, int n1, const char* s2, int n2) { +int ir_string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2) { return string_compare(s1, n1, s2, n2, std::min(n1, n2)); } } // namespace doris diff --git a/be/src/util/sse2neo.h b/be/src/util/sse2neo.h new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index f4e573d425..c48097b722 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -234,7 +234,7 @@ parser code {: // Total keywords of doris terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALIAS, KW_ALL, KW_ALTER, KW_AND, KW_ANTI, KW_APPEND, KW_AS, KW_ASC, KW_AUTHORS, KW_ARRAY, - KW_BACKEND, KW_BACKUP, KW_BETWEEN, KW_BEGIN, KW_BIGINT, KW_BINLOG, KW_BITMAP, KW_BITMAP_UNION, KW_BOOLEAN, KW_BROKER, KW_BACKENDS, KW_BY, KW_BUILTIN, + KW_BACKEND, KW_BACKUP, KW_BETWEEN, KW_BEGIN, KW_BIGINT, KW_BINLOG, KW_BITMAP, KW_BITMAP_UNION, KW_BLOB, KW_BOOLEAN, KW_BROKER, KW_BACKENDS, KW_BY, KW_BUILTIN, KW_CANCEL, KW_CASE, KW_CAST, KW_CHAIN, KW_CHAR, KW_CHARSET, KW_CHECK, KW_CLUSTER, KW_CLUSTERS, KW_CLEAN, KW_COLLATE, KW_COLLATION, KW_COLUMN, KW_COLON, KW_COLUMNS, KW_COMMENT, KW_COMMIT, KW_COMMITTED, KW_CONFIG, KW_CONNECTION, KW_CONNECTION_ID, KW_CONSISTENT, KW_CONVERT, KW_COUNT, KW_CREATE, KW_CROSS, KW_CUBE, KW_CURRENT, KW_CURRENT_USER, @@ -265,7 +265,7 @@ terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALIAS, KW_ALL, KW_A KW_SKEW, KW_SMALLINT, KW_SNAPSHOT, KW_SONAME, KW_SPLIT, KW_START, KW_STATUS, KW_STATS, KW_STOP, KW_STORAGE, KW_STREAM, KW_STRING, KW_STRUCT, KW_SUM, KW_SUPERUSER, KW_SYNC, KW_SYSTEM, - KW_TABLE, KW_TABLES, KW_TABLET, KW_TASK, KW_TEMPORARY, KW_TERMINATED, KW_THAN, KW_TIME, KW_THEN, KW_TIMESTAMP, KW_TINYINT,KW_TRASH, + KW_TABLE, KW_TABLES, KW_TABLET, KW_TASK, KW_TEMPORARY, KW_TERMINATED, KW_TEXT, KW_THAN, KW_TIME, KW_THEN, KW_TIMESTAMP, KW_TINYINT,KW_TRASH, KW_TO, KW_TRANSACTION, KW_TRIGGERS, KW_TRIM, KW_TRUE, KW_TRUNCATE, KW_TYPE, KW_TYPES, KW_UNCOMMITTED, KW_UNBOUNDED, KW_UNION, KW_UNIQUE, KW_UNSIGNED, KW_USE, KW_USER, KW_USING, KW_UNINSTALL, KW_VALUE, KW_VALUES, KW_VARCHAR, KW_VARIABLES, KW_VERBOSE, KW_VIEW, @@ -4328,6 +4328,10 @@ type ::= {: RESULT = Type.BITMAP; :} | KW_STRING {: RESULT = ScalarType.createStringType(); :} + | KW_TEXT + {: RESULT = ScalarType.createStringType(); :} + | KW_BLOB + {: RESULT = ScalarType.createStringType(); :} | KW_VARCHAR LPAREN INTEGER_LITERAL:len RPAREN {: ScalarType type = ScalarType.createVarcharType(len.intValue()); type.setAssignedStrLenInColDefinition(); @@ -5145,6 +5149,8 @@ keyword ::= {: RESULT = id; :} | KW_BITMAP_UNION:id {: RESULT = id; :} + | KW_BLOB:id + {: RESULT = id; :} | KW_BOOLEAN:id {: RESULT = id; :} | KW_BROKER:id @@ -5365,6 +5371,8 @@ keyword ::= {: RESULT = id; :} | KW_TRANSACTION:id {: RESULT = id; :} + | KW_TEXT:id + {: RESULT = id; :} | KW_TRIGGERS:id {: RESULT = id; :} | KW_TRUNCATE:id diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index 8db63fdff5..a8f4ae701b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -747,8 +747,6 @@ public class SchemaChangeHandler extends AlterHandler { } else { modIndexSchema.add(newColumn); } - - checkRowLength(modIndexSchema); } /* @@ -962,21 +960,6 @@ public class SchemaChangeHandler extends AlterHandler { // value modIndexSchema.add(newColumn); } - - checkRowLength(modIndexSchema); - } - - // row length can not large than limit - private void checkRowLength(List modIndexSchema) throws DdlException { - int rowLengthBytes = 0; - for (Column column : modIndexSchema) { - rowLengthBytes += column.getType().getStorageLayoutBytes(); - } - - if (rowLengthBytes > Config.max_layout_length_per_row) { - throw new DdlException("The size of a row (" + rowLengthBytes + ") exceed the maximal row size: " - + Config.max_layout_length_per_row); - } } private void checkIndexExists(OlapTable olapTable, String targetIndexName) throws DdlException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java index 78fb1550e3..2cb4415970 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java @@ -344,7 +344,6 @@ public class CreateTableStmt extends DdlStmt { && keysDesc.getKeysType() == KeysType.UNIQUE_KEYS) { columnDefs.add(ColumnDef.newDeleteSignColumnDef(AggregateType.REPLACE)); } - int rowLengthBytes = 0; boolean hasHll = false; boolean hasBitmap = false; Set columnSet = Sets.newTreeSet(String.CASE_INSENSITIVE_ORDER); @@ -377,13 +376,6 @@ public class CreateTableStmt extends DdlStmt { if (!columnSet.add(columnDef.getName())) { ErrorReport.reportAnalysisException(ErrorCode.ERR_DUP_FIELDNAME, columnDef.getName()); } - - rowLengthBytes += columnDef.getType().getStorageLayoutBytes(); - } - - if (rowLengthBytes > Config.max_layout_length_per_row && engineName.equals("olap")) { - throw new AnalysisException("The size of a row (" + rowLengthBytes + ") exceed the maximal row size: " - + Config.max_layout_length_per_row); } if (hasHll && keysDesc.getKeysType() != KeysType.AGG_KEYS) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java index 67885ce2db..4bb717ec33 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java @@ -85,10 +85,6 @@ public abstract class ColumnType { schemaChangeMatrix[PrimitiveType.DOUBLE.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; schemaChangeMatrix[PrimitiveType.DOUBLE.ordinal()][PrimitiveType.STRING.ordinal()] = true; - schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; - schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.CHAR.ordinal()] = true; - schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.STRING.ordinal()] = true; - schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.TINYINT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.SMALLINT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.VARCHAR.ordinal()][PrimitiveType.INT.ordinal()] = true; @@ -107,9 +103,12 @@ public abstract class ColumnType { schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.FLOAT.ordinal()] = true; schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.DOUBLE.ordinal()] = true; schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.DATE.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.CHAR.ordinal()] = true; schemaChangeMatrix[PrimitiveType.CHAR.ordinal()][PrimitiveType.STRING.ordinal()] = true; schemaChangeMatrix[PrimitiveType.DECIMALV2.ordinal()][PrimitiveType.VARCHAR.ordinal()] = true; + schemaChangeMatrix[PrimitiveType.DECIMALV2.ordinal()][PrimitiveType.STRING.ordinal()] = true; schemaChangeMatrix[PrimitiveType.DATETIME.ordinal()][PrimitiveType.DATE.ordinal()] = true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index c8643c2a82..d318193aa4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1937,16 +1937,16 @@ public class FunctionSet