// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "olap/schema.h" #include "olap/uint24.h" #include "vec/columns/column_complex.h" #include "vec/columns/column_dictionary.h" #include "vec/columns/column_nullable.h" #include "vec/columns/predicate_column.h" #include "vec/core/types.h" #include "vec/data_types/data_type_factory.hpp" namespace doris { Schema::Schema(const Schema& other) { _copy_from(other); } Schema& Schema::operator=(const Schema& other) { if (this != &other) { _copy_from(other); } return *this; } void Schema::_copy_from(const Schema& other) { _col_ids = other._col_ids; _col_offsets = other._col_offsets; _num_key_columns = other._num_key_columns; _schema_size = other._schema_size; // Deep copy _cols // TODO(lingbin): really need clone? _cols.resize(other._cols.size(), nullptr); for (auto cid : _col_ids) { _cols[cid] = other._cols[cid]->clone(); } } void Schema::_init(const std::vector& cols, const std::vector& col_ids, size_t num_key_columns) { _col_ids = col_ids; _num_key_columns = num_key_columns; _cols.resize(cols.size(), nullptr); _col_offsets.resize(_cols.size(), -1); size_t offset = 0; std::unordered_set col_id_set(col_ids.begin(), col_ids.end()); for (int cid = 0; cid < cols.size(); ++cid) { if (col_id_set.find(cid) == col_id_set.end()) { continue; } _cols[cid] = FieldFactory::create(cols[cid]); _col_offsets[cid] = offset; // Plus 1 byte for null byte offset += _cols[cid]->size() + 1; } _schema_size = offset; } void Schema::_init(const std::vector& cols, const std::vector& col_ids, size_t num_key_columns) { _col_ids = col_ids; _num_key_columns = num_key_columns; _cols.resize(cols.size(), nullptr); _col_offsets.resize(_cols.size(), -1); size_t offset = 0; std::unordered_set col_id_set(col_ids.begin(), col_ids.end()); for (int cid = 0; cid < cols.size(); ++cid) { if (col_id_set.find(cid) == col_id_set.end()) { continue; } // TODO(lingbin): is it necessary to clone Field? each SegmentIterator will // use this func, can we avoid clone? _cols[cid] = cols[cid]->clone(); _col_offsets[cid] = offset; // Plus 1 byte for null byte offset += _cols[cid]->size() + 1; } _schema_size = offset; } Schema::~Schema() { for (auto col : _cols) { delete col; } } vectorized::DataTypePtr Schema::get_data_type_ptr(const Field& field) { return vectorized::DataTypeFactory::instance().create_data_type(field); } vectorized::IColumn::MutablePtr Schema::get_column_by_field(const Field& field) { return get_data_type_ptr(field)->create_column(); } vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& field, bool is_nullable) { vectorized::IColumn::MutablePtr ptr = nullptr; switch (field.type()) { case OLAP_FIELD_TYPE_BOOL: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_TINYINT: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_SMALLINT: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_INT: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_FLOAT: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_DOUBLE: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_BIGINT: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_LARGEINT: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_DATE: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_DATEV2: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_DATETIMEV2: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_DATETIME: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_CHAR: case OLAP_FIELD_TYPE_VARCHAR: case OLAP_FIELD_TYPE_STRING: if (config::enable_low_cardinality_optimize) { ptr = doris::vectorized::ColumnDictionary::create( field.type()); } else { ptr = doris::vectorized::PredicateColumnType::create(); } break; case OLAP_FIELD_TYPE_DECIMAL: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_DECIMAL32: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_DECIMAL64: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_DECIMAL128I: ptr = doris::vectorized::PredicateColumnType::create(); break; case OLAP_FIELD_TYPE_ARRAY: ptr = doris::vectorized::ColumnArray::create( get_predicate_column_ptr(*field.get_sub_field(0)), doris::vectorized::ColumnArray::ColumnOffsets::create()); break; case OLAP_FIELD_TYPE_STRUCT: { size_t field_size = field.get_sub_field_count(); doris::vectorized::MutableColumns columns(field_size); for (size_t i = 0; i < field_size; i++) { columns[i] = get_predicate_column_ptr(*field.get_sub_field(i)); } ptr = doris::vectorized::ColumnStruct::create(std::move(columns)); break; } case OLAP_FIELD_TYPE_MAP: ptr = doris::vectorized::ColumnMap::create( get_predicate_column_ptr(*field.get_sub_field(0)), get_predicate_column_ptr(*field.get_sub_field(1)), doris::vectorized::ColumnArray::ColumnOffsets::create()); break; default: LOG(FATAL) << "Unexpected type when choosing predicate column, type=" << field.type(); } if (field.is_nullable() || is_nullable) { return doris::vectorized::ColumnNullable::create(std::move(ptr), doris::vectorized::ColumnUInt8::create()); } return ptr; } } // namespace doris