/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #ifndef OCEANBASE_SHARE_AGGREGATE_MIN_MAX_H_ #define OCEANBASE_SHARE_AGGREGATE_MIN_MAX_H_ #include "share/aggregate/iaggregate.h" #include namespace oceanbase { namespace share { namespace aggregate { using namespace sql; struct CmpCalcInfo { CmpCalcInfo(ObObjMeta obj_meta, int16_t cell_len) : obj_meta_(obj_meta), agg_cell_len_(cell_len), calculated_(0) {} CmpCalcInfo() : obj_meta_(), agg_cell_len_(0), calculated_(0) {} operator int64_t() const { return flags_; } inline void set_calculated() { calculated_ = static_cast(1); } inline bool calculated() const { return calculated_ == 1; } union { struct { ObObjMeta obj_meta_; int16_t agg_cell_len_; // for fixed length type only int16_t calculated_; }; int64_t flags_; }; }; static_assert(sizeof(CmpCalcInfo) == sizeof(int64_t), ""); // fixed length type // exampl of min/max cell in aggr_row: // min(int64) // -------------------- // ... | int64 |... // -------------------- // // variable length type // example of min/max cell in aggr_row // max(str) // ------------------------------------------- // ...| , |... // ------------------------------------------- // second pair of is used to store tmp result of aggregate template class MinMaxAggregate final : public BatchAggregateWrapper> { using buf_node = std::pair; static const int32_t BUF_BLOCK_SIZE = 512; public: static const constexpr VecValueTypeClass IN_TC = vec_tc; static const constexpr VecValueTypeClass OUT_TC = vec_tc; public: // TODO: remove info for window function optimization public: MinMaxAggregate() {} int init(RuntimeContext &agg_ctx, const int64_t agg_col_id, ObIAllocator &allocator) override { UNUSEDx(agg_col_id, allocator); int ret = OB_SUCCESS; return ret; } int add_one_row(RuntimeContext &agg_ctx, int64_t row_num, int64_t batch_size, const bool is_null, const char *data, const int32_t data_len, int32_t agg_col_idx, char *agg_cell) override { int ret = OB_SUCCESS; NotNullBitVector ¬_nulls = agg_ctx.locate_notnulls_bitmap(agg_col_idx, agg_cell); if (!is_null) { int cmp_ret = 0; ObObjMeta obj_meta; if (not_nulls.at(agg_col_idx)) { if (VecTCCmpCalc::cmp == BasicCmpCalc::cmp || vec_tc == VEC_TC_FLOAT || vec_tc == VEC_TC_DOUBLE || vec_tc == VEC_TC_NUMBER || vec_tc == VEC_TC_TIMESTAMP_TINY || vec_tc == VEC_TC_TIMESTAMP_TZ || vec_tc == VEC_TC_INTERVAL_DS) { // no need for obj meta } else { obj_meta = agg_ctx.aggr_infos_.at(agg_col_idx).param_exprs_.at(0)->obj_meta_; } if (!helper::is_var_len_agg_cell(vec_tc)) { ret = VecTCCmpCalc::cmp( obj_meta, obj_meta, agg_cell, VEC_TC_NUMBER == vec_tc ? number::ObNumber::MAX_CALC_BYTE_LEN : sizeof(RTCType), data, data_len, cmp_ret); } else { int32_t agg_cell_len = *reinterpret_cast(agg_cell + sizeof(char *)); const char *agg_data = reinterpret_cast(*reinterpret_cast(agg_cell)); ret = VecTCCmpCalc::cmp(obj_meta, obj_meta, agg_data, agg_cell_len, data, data_len, cmp_ret); } if ((is_min && cmp_ret > 0) || (!is_min && cmp_ret < 0)) { if (!helper::is_var_len_agg_cell(vec_tc)) { MEMCPY(agg_cell, data, data_len); } else { *reinterpret_cast(agg_cell) = reinterpret_cast(data); *reinterpret_cast(agg_cell + sizeof(char *)) = data_len; if (OB_FAIL(set_tmp_var_agg_data(agg_ctx, agg_col_idx, agg_cell))) { SQL_LOG(WARN, "set var aggregate data failed", K(ret)); } } } } else if (helper::is_var_len_agg_cell(vec_tc)) { *reinterpret_cast(agg_cell) = reinterpret_cast(data); *reinterpret_cast(agg_cell + sizeof(char *)) = data_len; if (OB_FAIL(set_tmp_var_agg_data(agg_ctx, agg_col_idx, agg_cell))) { SQL_LOG(WARN, "set agg data failed", K(ret)); } } else { MEMCPY(agg_cell, data, data_len); } not_nulls.set(agg_col_idx); } return ret; } template int add_row(RuntimeContext &agg_ctx, ColumnFmt &columns, const int64_t row_num, const int32_t agg_col_id, char *agg_cell, void *tmp_res, int64_t &calc_info) { UNUSED(tmp_res); int ret = OB_SUCCESS; const char *row_data = nullptr; int32_t row_len = 0; int cmp_ret = 0; columns.get_payload(row_num, row_data, row_len); CmpCalcInfo &cmp_info = reinterpret_cast(calc_info); if (!helper::is_var_len_agg_cell(vec_tc)) { if (cmp_info.calculated()) { // do not need to read not_nulls bitmap for fixed length (include ObNUmber) types ret = VecTCCmpCalc::cmp(cmp_info.obj_meta_, cmp_info.obj_meta_, agg_cell, cmp_info.agg_cell_len_, row_data, row_len, cmp_ret); if (OB_FAIL(ret)) { SQL_LOG(WARN, "compare failed", K(ret)); } else if ((is_min && cmp_ret > 0) || (!is_min && cmp_ret < 0)) { MEMCPY(agg_cell, row_data, row_len); } } else { MEMCPY(agg_cell, row_data, row_len); cmp_info.set_calculated(); } } else { int32_t agg_cell_len = *reinterpret_cast(agg_cell + sizeof(char *)); const char *agg_data = reinterpret_cast(*reinterpret_cast(agg_cell)); if (cmp_info.calculated()) { ret = VecTCCmpCalc::cmp(cmp_info.obj_meta_, cmp_info.obj_meta_, agg_data, agg_cell_len, row_data, row_len, cmp_ret); if (OB_FAIL(ret)) { SQL_LOG(WARN, "compare failed", K(ret)); } else if ((is_min && cmp_ret > 0) || (!is_min && cmp_ret < 0)) { *reinterpret_cast(agg_cell) = reinterpret_cast(row_data); *reinterpret_cast(agg_cell + sizeof(char *)) = row_len; } } else { *reinterpret_cast(agg_cell) = reinterpret_cast(row_data); *reinterpret_cast(agg_cell + sizeof(char *)) = row_len; cmp_info.set_calculated(); } } return ret; } template int add_nullable_row(RuntimeContext &agg_ctx, ColumnFmt &columns, const int32_t row_num, const int32_t agg_col_id, char *agg_cell, void *tmp_res, int64_t &calc_info) { int ret = OB_SUCCESS; if (columns.is_null(row_num)) { // do nothing SQL_LOG(DEBUG, "add null row", K(is_min), K(agg_col_id), K(row_num)); } else if (OB_FAIL( add_row(agg_ctx, columns, row_num, agg_col_id, agg_cell, tmp_res, calc_info))) { SQL_LOG(WARN, "add row failed", K(ret)); } else { NotNullBitVector ¬_nulls = agg_ctx.locate_notnulls_bitmap(agg_col_id, agg_cell); not_nulls.set(agg_col_id); } return ret; } template int collect_group_result(RuntimeContext &agg_ctx, const sql::ObExpr &agg_expr, const int32_t agg_col_id, const char *agg_cell, const int32_t agg_cell_len) { int ret = OB_SUCCESS; ObEvalCtx &ctx = agg_ctx.eval_ctx_; int64_t output_idx = ctx.get_batch_idx(); ColumnFmt *res_vec = static_cast(agg_expr.get_vector(ctx)); const NotNullBitVector ¬_nulls = agg_ctx.locate_notnulls_bitmap(agg_col_id, agg_cell); if (OB_LIKELY(not_nulls.at(agg_col_id))) { if (helper::is_var_len_agg_cell(vec_tc)) { char *res_buf = agg_expr.get_str_res_mem(ctx, agg_cell_len); if (OB_ISNULL(res_buf)) { ret = OB_ALLOCATE_MEMORY_FAILED; SQL_LOG(WARN, "allocate memory failed", K(ret)); } else { const char *data = reinterpret_cast(*reinterpret_cast(agg_cell)); CellWriter>::set(data, agg_cell_len, res_vec, output_idx, res_buf); } } else { CellWriter>::set(agg_cell, agg_cell_len, res_vec, output_idx, nullptr); } } else { res_vec->set_null(output_idx); } return ret; } int collect_tmp_result(RuntimeContext &agg_ctx, const int32_t agg_col_id, char *agg_cell) { int ret = OB_SUCCESS; NotNullBitVector ¬_nulls = agg_ctx.locate_notnulls_bitmap(agg_col_id, agg_cell); // if result value has variable result length, e.g. string value, // we need copy value to tmp buffer in case value ptr stored was changed after next batch loop. if (not_nulls.at(agg_col_id) && helper::is_var_len_agg_cell(vec_tc)) { if (OB_FAIL(set_tmp_var_agg_data(agg_ctx, agg_col_id, agg_cell))) { SQL_LOG(WARN, "set variable aggregate data failed", K(ret)); } } return ret; } void reuse() override { } void destroy() override { } inline int64_t get_batch_calc_info(RuntimeContext &agg_ctx, int32_t agg_col_idx, char *agg_cell) override { ObObjMeta &obj_meta = agg_ctx.locate_aggr_info(agg_col_idx).param_exprs_.at(0)->obj_meta_; NotNullBitVector ¬_nulls = agg_ctx.locate_notnulls_bitmap(agg_col_idx, agg_cell); int32_t agg_cell_len = 0; if (!helper::is_var_len_agg_cell(vec_tc)) { agg_cell_len = agg_ctx.row_meta().get_cell_len(agg_col_idx, nullptr/*not used*/); } CmpCalcInfo info = CmpCalcInfo(obj_meta, static_cast(agg_cell_len)); if (not_nulls.at(agg_col_idx)) { info.set_calculated(); } return info; } TO_STRING_KV("aggregate", (is_min ? "min" : "max"), K(vec_tc)); private: int set_tmp_var_agg_data(RuntimeContext &agg_ctx, const int32_t agg_col_id, char *agg_cell) { int ret = OB_SUCCESS; if (helper::is_var_len_agg_cell(vec_tc)) { char *agg_data = reinterpret_cast(*reinterpret_cast(agg_cell)); int32_t agg_data_len = *reinterpret_cast(agg_cell + sizeof(char *)); char *tmp_buf = reinterpret_cast(*reinterpret_cast(agg_cell + sizeof(char *) + sizeof(int32_t))); int32_t &cap = *reinterpret_cast(agg_cell + sizeof(int32_t) + sizeof(char *) * 2); if (cap < agg_data_len) { int32_t new_cap = 2 *((agg_data_len + BUF_BLOCK_SIZE - 1) / BUF_BLOCK_SIZE) * BUF_BLOCK_SIZE; void *new_buf = nullptr; if (OB_ISNULL(new_buf = agg_ctx.allocator_.alloc(new_cap))) { ret = OB_ALLOCATE_MEMORY_FAILED; SQL_LOG(WARN, "allocate memory failed", K(ret)); } else { tmp_buf = (char *)new_buf; cap = new_cap; *reinterpret_cast(agg_cell + sizeof(char *) + sizeof(int32_t)) = reinterpret_cast(tmp_buf); } } if (OB_SUCC(ret)) { MEMCPY(tmp_buf, agg_data, agg_data_len); *reinterpret_cast(agg_cell) = reinterpret_cast(tmp_buf); } } return ret; } }; } // end namespace aggregate } // end namespace share } // end namespace oceanbase #endif // OCEANBASE_SHARE_AGGREGATE_MIN_MAX_H_