// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include "olap/utils.h" #include "runtime/datetime_value.h" #include "runtime/decimalv2_value.h" #include "runtime/descriptors.h" #include "runtime/mem_pool.h" #include "runtime/runtime_state.h" #include "runtime/string_value.h" #include "runtime/tuple.h" #include "text_converter.h" #include "util/binary_cast.hpp" #include "util/string_parser.hpp" #include "util/types.h" #include "vec/runtime/vdatetime_value.h" namespace doris { // Note: this function has a codegen'd version. Changing this function requires // corresponding changes to CodegenWriteSlot. inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, Tuple* tuple, const char* data, int len, bool copy_string, bool need_escape, MemPool* pool) { //Small batch import only \N is considered to be NULL, there is no replace_value function for batch import if (true == slot_desc->is_nullable()) { if (len == 2 && data[0] == '\\' && data[1] == 'N') { tuple->set_null(slot_desc->null_indicator_offset()); return true; } else { tuple->set_not_null(slot_desc->null_indicator_offset()); } } StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; void* slot = tuple->get_slot(slot_desc->tuple_offset()); // Parse the raw-text data. Translate the text string to internal format. switch (slot_desc->type().type) { case TYPE_HLL: case TYPE_VARCHAR: case TYPE_CHAR: case TYPE_STRING: { StringValue* str_slot = reinterpret_cast(slot); str_slot->ptr = const_cast(data); str_slot->len = len; if (len != 0 && (copy_string || need_escape)) { DCHECK(pool != NULL); char* slot_data = reinterpret_cast(pool->allocate(len)); if (need_escape) { unescape_string(data, slot_data, &str_slot->len); } else { memcpy(slot_data, data, str_slot->len); } str_slot->ptr = slot_data; } break; } case TYPE_BOOLEAN: *reinterpret_cast(slot) = StringParser::string_to_bool(data, len, &parse_result); break; case TYPE_TINYINT: *reinterpret_cast(slot) = StringParser::string_to_int(data, len, &parse_result); break; case TYPE_SMALLINT: *reinterpret_cast(slot) = StringParser::string_to_int(data, len, &parse_result); break; case TYPE_INT: *reinterpret_cast(slot) = StringParser::string_to_int(data, len, &parse_result); break; case TYPE_BIGINT: *reinterpret_cast(slot) = StringParser::string_to_int(data, len, &parse_result); break; case TYPE_LARGEINT: { __int128 tmp = StringParser::string_to_int<__int128>(data, len, &parse_result); memcpy(slot, &tmp, sizeof(tmp)); break; } case TYPE_FLOAT: *reinterpret_cast(slot) = StringParser::string_to_float(data, len, &parse_result); break; case TYPE_DOUBLE: *reinterpret_cast(slot) = StringParser::string_to_float(data, len, &parse_result); break; case TYPE_DATE: { DateTimeValue* ts_slot = reinterpret_cast(slot); if (!ts_slot->from_date_str(data, len)) { parse_result = StringParser::PARSE_FAILURE; break; } ts_slot->cast_to_date(); break; } case TYPE_DATETIME: { DateTimeValue* ts_slot = reinterpret_cast(slot); if (!ts_slot->from_date_str(data, len)) { parse_result = StringParser::PARSE_FAILURE; } ts_slot->to_datetime(); break; } case TYPE_DECIMALV2: { DecimalV2Value decimal_slot; if (decimal_slot.parse_from_str(data, len)) { parse_result = StringParser::PARSE_FAILURE; } *reinterpret_cast(slot) = binary_cast(decimal_slot); break; } default: DCHECK(false) << "bad slot type: " << slot_desc->type(); break; } // TODO: add warning for overflow case if (parse_result != StringParser::PARSE_SUCCESS) { tuple->set_null(slot_desc->null_indicator_offset()); return false; } return true; } inline bool TextConverter::write_column(const SlotDescriptor* slot_desc, vectorized::MutableColumnPtr* column_ptr, const char* data, size_t len, bool copy_string, bool need_escape) { vectorized::IColumn* col_ptr = column_ptr->get(); // \N means it's NULL if (true == slot_desc->is_nullable()) { auto* nullable_column = reinterpret_cast(column_ptr->get()); if ((len == 2 && data[0] == '\\' && data[1] == 'N') || len == SQL_NULL_DATA) { nullable_column->insert_data(nullptr, 0); return true; } else { nullable_column->get_null_map_data().push_back(0); col_ptr = &nullable_column->get_nested_column(); } } StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; // Parse the raw-text data. Translate the text string to internal format. switch (slot_desc->type().type) { case TYPE_HLL: { reinterpret_cast(col_ptr)->get_data().emplace_back( HyperLogLog(Slice(data, len))); break; } case TYPE_VARCHAR: case TYPE_CHAR: { if (need_escape) { unescape_string_on_spot(data, &len); } reinterpret_cast(col_ptr)->insert_data(data, len); break; } case TYPE_BOOLEAN: { bool num = StringParser::string_to_bool(data, len, &parse_result); reinterpret_cast*>(col_ptr)->insert_value( (uint8_t)num); break; } case TYPE_TINYINT: { int8_t num = StringParser::string_to_int(data, len, &parse_result); reinterpret_cast*>(col_ptr)->insert_value(num); break; } case TYPE_SMALLINT: { int16_t num = StringParser::string_to_int(data, len, &parse_result); reinterpret_cast*>(col_ptr)->insert_value(num); break; } case TYPE_INT: { int32_t num = StringParser::string_to_int(data, len, &parse_result); reinterpret_cast*>(col_ptr)->insert_value(num); break; } case TYPE_BIGINT: { int64_t num = StringParser::string_to_int(data, len, &parse_result); reinterpret_cast*>(col_ptr)->insert_value(num); break; } case TYPE_LARGEINT: { __int128 num = StringParser::string_to_int<__int128>(data, len, &parse_result); reinterpret_cast*>(col_ptr)->insert_value(num); break; } case TYPE_FLOAT: { float num = StringParser::string_to_float(data, len, &parse_result); reinterpret_cast*>(col_ptr)->insert_value( num); break; } case TYPE_DOUBLE: { double num = StringParser::string_to_float(data, len, &parse_result); reinterpret_cast*>(col_ptr)->insert_value( num); break; } case TYPE_DATE: { vectorized::VecDateTimeValue ts_slot; if (!ts_slot.from_date_str(data, len)) { parse_result = StringParser::PARSE_FAILURE; break; } ts_slot.cast_to_date(); reinterpret_cast*>(col_ptr)->insert_data( reinterpret_cast(&ts_slot), 0); break; } case TYPE_DATETIME: { vectorized::VecDateTimeValue ts_slot; if (!ts_slot.from_date_str(data, len)) { parse_result = StringParser::PARSE_FAILURE; break; } ts_slot.to_datetime(); reinterpret_cast*>(col_ptr)->insert_data( reinterpret_cast(&ts_slot), 0); break; } case TYPE_DECIMALV2: { DecimalV2Value decimal_slot; if (decimal_slot.parse_from_str(data, len)) { parse_result = StringParser::PARSE_FAILURE; break; } PackedInt128 num = binary_cast(decimal_slot); reinterpret_cast*>(col_ptr)->insert_value( num.value); break; } default: DCHECK(false) << "bad slot type: " << slot_desc->type(); break; } if (parse_result == StringParser::PARSE_FAILURE) { if (true == slot_desc->is_nullable()) { auto* nullable_column = reinterpret_cast(column_ptr->get()); size_t size = nullable_column->get_null_map_data().size(); doris::vectorized::NullMap& null_map_data = nullable_column->get_null_map_data(); null_map_data[size - 1] = 1; } else { return false; } } return true; } } // namespace doris