// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "util/tuple_row_zorder_compare.h" namespace doris { RowComparator::RowComparator(Schema* schema) { } int RowComparator::operator()(const char* left, const char* right) const { return -1; } TupleRowZOrderComparator::TupleRowZOrderComparator() { _schema = nullptr; _sort_col_num = 0; } TupleRowZOrderComparator::TupleRowZOrderComparator(int sort_col_num) { _schema = nullptr; _sort_col_num = sort_col_num; } TupleRowZOrderComparator::TupleRowZOrderComparator(Schema* schema, int sort_col_num) :_schema(schema), _sort_col_num(sort_col_num) { _max_col_size = get_type_byte_size(_schema->column(0)->type()); for (size_t i = 1; i < _sort_col_num; ++i) { if (_max_col_size < get_type_byte_size(_schema->column(i)->type())) { _max_col_size = get_type_byte_size(_schema->column(i)->type()); } } } int TupleRowZOrderComparator::compare(const char* lhs, const char* rhs) const { ContiguousRow lhs_row(_schema, lhs); ContiguousRow rhs_row(_schema, rhs); if (_max_col_size <= 4) { return compare_based_on_size(lhs_row, rhs_row); } else if (_max_col_size <= 8) { return compare_based_on_size(lhs_row, rhs_row); } else { return compare_based_on_size(lhs_row, rhs_row); } } void TupleRowZOrderComparator::max_col_size(const RowCursor& rc) { _max_col_size = get_type_byte_size(rc.schema()->column(0)->type()); for (size_t i = 1; i < _sort_col_num; ++i) { if (_max_col_size < get_type_byte_size(rc.schema()->column(i)->type())) { _max_col_size = get_type_byte_size(rc.schema()->column(i)->type()); } } } int TupleRowZOrderComparator::compare_row(const RowCursor& lhs, const RowCursor& rhs) { max_col_size(lhs); if (_max_col_size <= 4) { return compare_based_on_size(lhs, rhs); } else if (_max_col_size <= 8) { return compare_based_on_size(lhs, lhs); } else { return compare_based_on_size(lhs, lhs); } } template int TupleRowZOrderComparator::compare_based_on_size(LhsRowType& lhs, LhsRowType& rhs) const { auto less_msb = [](U x, U y) { return x < y && x < (x ^ y); }; FieldType type = lhs.schema()->column(0)->type(); U msd_lhs = get_shared_representation(lhs.cell(0).is_null() ? nullptr : lhs.cell(0).cell_ptr(), type); U msd_rhs = get_shared_representation(rhs.cell(0).is_null() ? nullptr : rhs.cell(0).cell_ptr(), type); for (int i = 1; i < _sort_col_num; ++i) { type = lhs.schema()->column(i)->type(); const void *lhs_v = lhs.cell(i).is_null() ? nullptr : lhs.cell(i).cell_ptr(); const void *rhs_v = rhs.cell(i).is_null() ? nullptr : rhs.cell(i).cell_ptr(); U lhsi = get_shared_representation(lhs_v, type); U rhsi = get_shared_representation(rhs_v, type); if (less_msb(msd_lhs ^ msd_rhs, lhsi ^ rhsi)) { msd_lhs = lhsi; msd_rhs = rhsi; } } return msd_lhs < msd_rhs ? -1 : (msd_lhs > msd_rhs ? 1 : 0); } template U TupleRowZOrderComparator::get_shared_representation(const void *val, FieldType type) const { // The mask used for setting the sign bit correctly. if (val == NULL) return 0; constexpr U mask = (U) 1 << (sizeof(U) * 8 - 1); switch (type) { case FieldType::OLAP_FIELD_TYPE_NONE: return 0; case FieldType::OLAP_FIELD_TYPE_BOOL: return static_cast(*reinterpret_cast(val)) << (sizeof(U) * 8 - 1); case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT: case FieldType::OLAP_FIELD_TYPE_TINYINT: return get_shared_int_representation( *reinterpret_cast(val), mask); case FieldType::OLAP_FIELD_TYPE_SMALLINT: case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: return get_shared_int_representation( *reinterpret_cast(val), mask); case FieldType::OLAP_FIELD_TYPE_INT: return get_shared_int_representation( *reinterpret_cast(val), mask); case FieldType::OLAP_FIELD_TYPE_DATETIME: case FieldType::OLAP_FIELD_TYPE_DATE: case FieldType::OLAP_FIELD_TYPE_BIGINT: case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT: return get_shared_int_representation( *reinterpret_cast(val), mask); case FieldType::OLAP_FIELD_TYPE_LARGEINT: return static_cast(*reinterpret_cast(val)) ^ mask; case FieldType::OLAP_FIELD_TYPE_FLOAT: return get_shared_float_representation(val, mask); case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE: case FieldType::OLAP_FIELD_TYPE_DOUBLE: return get_shared_float_representation(val, mask); case FieldType::OLAP_FIELD_TYPE_CHAR: case FieldType::OLAP_FIELD_TYPE_VARCHAR:{ const StringValue *string_value = reinterpret_cast(val); return get_shared_string_representation(string_value->ptr, string_value->len); } case FieldType::OLAP_FIELD_TYPE_DECIMAL: { decimal12_t decimal_val = *reinterpret_cast(val); int128_t value = decimal_val.integer*DecimalV2Value::ONE_BILLION + decimal_val.fraction; return static_cast(value) ^ mask; } default: return 0; } } template U inline TupleRowZOrderComparator::get_shared_int_representation(const T val, U mask) const { uint64_t shift_size = static_cast( std::max(static_cast((sizeof(U) - sizeof(T)) * 8), (int64_t) 0)); return (static_cast(val) << shift_size) ^ mask; } template U inline TupleRowZOrderComparator::get_shared_float_representation(const void *val, U mask) const { int64_t tmp; T floating_value = *reinterpret_cast(val); memcpy(&tmp, &floating_value, sizeof(T)); if (UNLIKELY(std::isnan(floating_value))) return 0; // "int" is enough because U and T are only primitive type int s = (int)((sizeof(U) - sizeof(T)) * 8); if (floating_value < 0.0) { // Flipping all bits for negative values. return static_cast(~tmp) << std::max(s, 0); } else { // Flipping only first bit. return (static_cast(tmp) << std::max(s, 0)) ^ mask; } } template U inline TupleRowZOrderComparator::get_shared_string_representation(const char *char_ptr, int length) const { int len = length < sizeof(U) ? length : sizeof(U); if (len == 0) return 0; U dst = 0; // We copy the bytes from the string but swap the bytes because of integer endianness. BitUtil::ByteSwapScalar(&dst, char_ptr, len); return dst << ((sizeof(U) - len) * 8); } int TupleRowZOrderComparator::operator()(const char* lhs, const char* rhs) const { int result = compare(lhs, rhs); return result; } int TupleRowZOrderComparator::get_type_byte_size(FieldType type) const { switch (type) { case FieldType::OLAP_FIELD_TYPE_OBJECT: case FieldType::OLAP_FIELD_TYPE_HLL: case FieldType::OLAP_FIELD_TYPE_STRUCT: case FieldType::OLAP_FIELD_TYPE_ARRAY: case FieldType::OLAP_FIELD_TYPE_MAP: case FieldType::OLAP_FIELD_TYPE_CHAR: case FieldType::OLAP_FIELD_TYPE_VARCHAR: return 0; case FieldType::OLAP_FIELD_TYPE_NONE: case FieldType::OLAP_FIELD_TYPE_BOOL: case FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT: case FieldType::OLAP_FIELD_TYPE_TINYINT: return 1; case FieldType::OLAP_FIELD_TYPE_SMALLINT: case FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT: return 2; case FieldType::OLAP_FIELD_TYPE_FLOAT: case FieldType::OLAP_FIELD_TYPE_INT: case FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT: return 4; case FieldType::OLAP_FIELD_TYPE_DISCRETE_DOUBLE: case FieldType::OLAP_FIELD_TYPE_DOUBLE: case FieldType::OLAP_FIELD_TYPE_BIGINT: case FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT: return 8; case FieldType::OLAP_FIELD_TYPE_DECIMAL: case FieldType::OLAP_FIELD_TYPE_LARGEINT: case FieldType::OLAP_FIELD_TYPE_DATETIME: case FieldType::OLAP_FIELD_TYPE_DATE: return 16; case FieldType::OLAP_FIELD_TYPE_UNKNOWN: DCHECK(false); break; default: DCHECK(false); } return -1; } }