From 5cbd86c9e6eb6b6d5b319af37a14c89c321d8a8e Mon Sep 17 00:00:00 2001 From: wanghangQ <493666756@qq.com> Date: Thu, 29 Aug 2024 07:52:21 +0000 Subject: [PATCH] [FEAT MERGE] Vector Index & Array Type Co-authored-by: helloamateur Co-authored-by: skylhd Co-authored-by: JLY2015 <1623359870@qq.com> --- deps/init/oceanbase.el7.aarch64.deps | 1 + deps/init/oceanbase.el7.x86_64.deps | 1 + deps/init/oceanbase.el8.aarch64.deps | 1 + deps/init/oceanbase.el8.x86_64.deps | 1 + deps/init/oceanbase.el9.aarch64.deps | 1 + deps/init/oceanbase.el9.x86_64.deps | 1 + deps/oblib/src/common/cell/ob_cell_reader.cpp | 2 + deps/oblib/src/common/cell/ob_cell_writer.cpp | 3 +- deps/oblib/src/common/ob_accuracy.cpp | 15 +- deps/oblib/src/common/object/ob_obj_funcs.h | 2 +- deps/oblib/src/common/object/ob_obj_type.cpp | 40 +- deps/oblib/src/common/object/ob_obj_type.h | 3 + deps/oblib/src/common/object/ob_object.cpp | 3 +- deps/oblib/src/common/object/ob_object.h | 5 +- deps/oblib/src/lib/CMakeLists.txt | 43 + .../src/lib/mysqlclient/ob_mysql_proxy.h | 13 +- deps/oblib/src/lib/ob_define.h | 8 + deps/oblib/src/lib/ob_name_def.h | 17 + deps/oblib/src/lib/udt/ob_array_type.cpp | 1242 +++++++++ deps/oblib/src/lib/udt/ob_array_type.h | 861 ++++++ deps/oblib/src/lib/udt/ob_collection_type.cpp | 492 ++++ deps/oblib/src/lib/udt/ob_collection_type.h | 123 + deps/oblib/src/lib/vector/ob_vector_util.cpp | 151 ++ deps/oblib/src/lib/vector/ob_vector_util.h | 73 + deps/oblib/src/rpc/obmysql/ob_mysql_global.h | 6 + deps/oblib/src/rpc/obrpc/ob_rpc_packet_list.h | 2 +- mittest/mtlenv/CMakeLists.txt | 1 + mittest/mtlenv/test_vector_index_adaptor.cpp | 1264 +++++++++ .../data_dictionary/ob_data_dict_struct.h | 2 + .../libobcdc/src/ob_log_formatter.cpp | 4 +- .../libobcdc/src/ob_log_meta_manager.cpp | 6 + .../libobcdc/src/ob_log_part_trans_task.cpp | 5 + .../libobcdc/src/ob_log_part_trans_task.h | 1 + .../libobcdc/src/ob_log_schema_cache_info.cpp | 3 +- src/logservice/libobcdc/src/ob_log_utils.cpp | 14 + src/logservice/libobcdc/src/ob_log_utils.h | 1 + .../libobcdc/src/ob_obj2str_helper.cpp | 18 +- .../libobcdc/src/ob_obj2str_helper.h | 6 + .../tests/ob_binlog_record_printer.cpp | 10 +- src/observer/CMakeLists.txt | 1 + src/observer/mysql/obsm_utils.cpp | 7 +- src/observer/ob_rpc_processor_simple.cpp | 14 + src/observer/ob_rpc_processor_simple.h | 1 + src/observer/ob_server.cpp | 7 + src/observer/ob_service.cpp | 75 + src/observer/ob_service.h | 1 + src/observer/ob_srv_xlator_rootserver.cpp | 2 +- src/observer/omt/ob_multi_tenant.cpp | 2 + .../table/ttl/ob_tenant_tablet_ttl_mgr.cpp | 14 +- .../table/ttl/ob_tenant_tablet_ttl_mgr.h | 4 + .../ob_all_virtual_vector_index_info.cpp | 243 ++ .../ob_all_virtual_vector_index_info.h | 104 + .../ob_information_columns_table.cpp | 14 +- .../ob_information_parameters_table.cpp | 6 +- .../virtual_table/ob_mysql_proc_table.cpp | 3 +- .../virtual_table/ob_table_columns.cpp | 15 + src/observer/virtual_table/ob_table_index.cpp | 224 +- src/observer/virtual_table/ob_table_index.h | 7 + .../ob_virtual_table_iterator_factory.cpp | 12 + src/pl/CMakeLists.txt | 1 + src/pl/ob_pl_interface_pragma.h | 12 + src/pl/ob_pl_package_manager.cpp | 3 +- src/pl/sys_package/ob_dbms_vector_mysql.cpp | 110 + src/pl/sys_package/ob_dbms_vector_mysql.h | 38 + src/rootserver/CMakeLists.txt | 3 + .../ddl_task/ob_ddl_redefinition_task.cpp | 289 +- .../ddl_task/ob_ddl_redefinition_task.h | 2 - src/rootserver/ddl_task/ob_ddl_scheduler.cpp | 351 +++ src/rootserver/ddl_task/ob_ddl_scheduler.h | 42 + .../ob_ddl_single_replica_executor.cpp | 6 +- .../ddl_task/ob_ddl_single_replica_executor.h | 2 +- src/rootserver/ddl_task/ob_ddl_task.cpp | 166 +- src/rootserver/ddl_task/ob_ddl_task.h | 48 +- .../ddl_task/ob_drop_vec_index_task.cpp | 899 +++++++ .../ddl_task/ob_drop_vec_index_task.h | 126 + .../ddl_task/ob_fts_index_build_task.cpp | 498 ++-- .../ddl_task/ob_fts_index_build_task.h | 40 +- .../ddl_task/ob_index_build_task.cpp | 15 +- .../ddl_task/ob_rebuild_index_task.cpp | 754 ++++++ .../ddl_task/ob_rebuild_index_task.h | 99 + .../ddl_task/ob_vec_index_build_task.cpp | 1934 ++++++++++++++ .../ddl_task/ob_vec_index_build_task.h | 192 ++ src/rootserver/ob_ddl_operator.cpp | 113 +- src/rootserver/ob_ddl_operator.h | 3 +- src/rootserver/ob_ddl_service.cpp | 741 +++++- src/rootserver/ob_ddl_service.h | 56 +- src/rootserver/ob_index_builder.cpp | 317 ++- src/rootserver/ob_index_builder.h | 25 +- src/rootserver/ob_root_service.cpp | 46 +- src/rootserver/ob_root_service.h | 7 +- src/rootserver/ob_rs_rpc_processor.h | 2 +- src/share/CMakeLists.txt | 22 +- src/share/aggregate/agg_ctx.h | 1 + src/share/aggregate/first_row.h | 11 +- src/share/aggregate/iaggregate.h | 58 +- src/share/aggregate/single_row.cpp | 9 + src/share/aggregate/sum.cpp | 14 + src/share/aggregate/sum.h | 265 ++ src/share/aggregate/util.h | 1 + src/share/config/ob_config_helper.cpp | 50 +- src/share/config/ob_config_helper.h | 10 + src/share/config/ob_server_config.h | 1 + src/share/datum/ob_datum_cmp_func_def.h | 31 + src/share/datum/ob_datum_funcs.cpp | 137 +- src/share/datum/ob_datum_funcs.h | 1 + .../ob_inner_table_schema.12451_12500.cpp | 324 +++ .../ob_inner_table_schema.15451_15500.cpp | 324 +++ src/share/inner_table/ob_inner_table_schema.h | 21 +- .../ob_inner_table_schema_constants.h | 4 + .../inner_table/ob_inner_table_schema_def.py | 38 +- .../sys_package/dbms_vector_body_mysql.sql | 55 + .../sys_package/dbms_vector_mysql.sql | 78 + src/share/inner_table/table_id_to_name | 3 + src/share/ob_common_rpc_proxy.h | 2 +- src/share/ob_ddl_common.cpp | 366 ++- src/share/ob_ddl_common.h | 43 +- src/share/ob_debug_sync_point.h | 2 + src/share/ob_domain_index_builder_util.cpp | 108 + src/share/ob_domain_index_builder_util.h | 57 + src/share/ob_fts_index_builder_util.cpp | 2 +- src/share/ob_index_builder_util.cpp | 43 +- src/share/ob_lob_access_utils.cpp | 26 + src/share/ob_lob_access_utils.h | 4 +- src/share/ob_rpc_struct.cpp | 137 +- src/share/ob_rpc_struct.h | 92 +- src/share/ob_srv_rpc_proxy.h | 1 + src/share/ob_vec_index_builder_util.cpp | 2325 +++++++++++++++++ src/share/ob_vec_index_builder_util.h | 237 ++ src/share/object/ob_obj_cast.cpp | 123 +- src/share/parameter/ob_parameter_seed.ipp | 4 + src/share/rc/ob_tenant_base.h | 2 + src/share/scheduler/ob_dag_scheduler_config.h | 4 + src/share/scheduler/ob_sys_task_stat.cpp | 3 +- src/share/scheduler/ob_sys_task_stat.h | 1 + src/share/scheduler/ob_tenant_dag_scheduler.h | 2 + src/share/schema/ob_column_schema.cpp | 16 + src/share/schema/ob_column_schema.h | 12 +- src/share/schema/ob_schema_getter_guard.cpp | 5 +- src/share/schema/ob_schema_getter_guard.h | 3 +- src/share/schema/ob_schema_mgr.cpp | 10 +- src/share/schema/ob_schema_printer.cpp | 95 +- src/share/schema/ob_schema_printer.h | 6 + src/share/schema/ob_schema_retrieve_utils.ipp | 4 +- src/share/schema/ob_schema_service.cpp | 3 + src/share/schema/ob_schema_struct.h | 47 +- src/share/schema/ob_schema_utils.cpp | 54 +- src/share/schema/ob_schema_utils.h | 7 + src/share/schema/ob_table_dml_param.cpp | 58 +- src/share/schema/ob_table_dml_param.h | 12 + src/share/schema/ob_table_param.cpp | 16 +- src/share/schema/ob_table_param.h | 3 + src/share/schema/ob_table_schema.cpp | 114 +- src/share/schema/ob_table_schema.h | 60 +- src/share/schema/ob_table_sql_service.cpp | 28 +- .../system_variable/ob_sys_var_class_type.h | 1 + .../ob_system_variable_alias.h | 1 + .../ob_system_variable_factory.cpp | 24 + .../ob_system_variable_factory.h | 221 +- .../ob_system_variable_init.cpp | 1907 +++++++------- .../ob_system_variable_init.json | 7 +- src/share/vector/expr_cmp_func.cpp | 121 +- src/share/vector/ob_continuous_vector.cpp | 1 + src/share/vector/ob_discrete_vector.cpp | 1 + src/share/vector/ob_uniform_vector.cpp | 2 + src/share/vector/ob_vector_define.h | 6 + src/share/vector/vector_basic_op.h | 41 +- .../ob_plugin_vector_index_adaptor.cpp | 2097 +++++++++++++++ .../ob_plugin_vector_index_adaptor.h | 642 +++++ .../ob_plugin_vector_index_scheduler.cpp | 1471 +++++++++++ .../ob_plugin_vector_index_scheduler.h | 427 +++ .../ob_plugin_vector_index_serialize.cpp | 340 +++ .../ob_plugin_vector_index_serialize.h | 199 ++ .../ob_plugin_vector_index_service.cpp | 1050 ++++++++ .../ob_plugin_vector_index_service.h | 327 +++ .../ob_plugin_vector_index_util.cpp | 244 ++ .../ob_plugin_vector_index_util.h | 99 + .../ob_plugin_vector_index_utils.cpp | 1217 +++++++++ .../ob_plugin_vector_index_utils.h | 145 + .../vector_index/ob_vector_index_util.cpp | 835 ++++++ src/share/vector_index/ob_vector_index_util.h | 139 + .../vector_type/ob_vector_cosine_distance.cpp | 78 + .../vector_type/ob_vector_cosine_distance.h | 38 + .../vector_type/ob_vector_ip_distance.cpp | 36 + src/share/vector_type/ob_vector_ip_distance.h | 36 + .../vector_type/ob_vector_l1_distance.cpp | 41 + src/share/vector_type/ob_vector_l1_distance.h | 35 + .../vector_type/ob_vector_l2_distance.cpp | 55 + src/share/vector_type/ob_vector_l2_distance.h | 37 + src/share/vector_type/ob_vector_norm.cpp | 54 + src/share/vector_type/ob_vector_norm.h | 36 + src/sql/CMakeLists.txt | 15 + src/sql/code_generator/ob_dml_cg_service.cpp | 285 +- src/sql/code_generator/ob_dml_cg_service.h | 26 +- .../code_generator/ob_static_engine_cg.cpp | 21 +- .../ob_static_engine_expr_cg.cpp | 36 + .../code_generator/ob_static_engine_expr_cg.h | 2 + src/sql/code_generator/ob_tsc_cg_service.cpp | 666 ++++- src/sql/code_generator/ob_tsc_cg_service.h | 36 +- src/sql/das/iter/ob_das_iter.cpp | 24 + src/sql/das/iter/ob_das_iter.h | 2 + src/sql/das/iter/ob_das_iter_define.h | 3 + src/sql/das/iter/ob_das_iter_utils.cpp | 65 +- src/sql/das/iter/ob_das_iter_utils.h | 15 + src/sql/das/iter/ob_das_local_lookup_iter.cpp | 1 + src/sql/das/iter/ob_das_merge_iter.cpp | 2 + src/sql/das/iter/ob_das_vid_merge_iter.cpp | 785 ++++++ src/sql/das/iter/ob_das_vid_merge_iter.h | 166 ++ src/sql/das/ob_das_attach_define.cpp | 4 + src/sql/das/ob_das_attach_define.h | 23 + src/sql/das/ob_das_def_reg.h | 9 + src/sql/das/ob_das_define.h | 8 +- src/sql/das/ob_das_delete_op.cpp | 11 +- src/sql/das/ob_das_dml_ctx_define.cpp | 5 +- src/sql/das/ob_das_dml_ctx_define.h | 3 +- src/sql/das/ob_das_dml_vec_iter.cpp | 251 ++ src/sql/das/ob_das_dml_vec_iter.h | 71 + src/sql/das/ob_das_domain_utils.cpp | 10 + src/sql/das/ob_das_factory.cpp | 1 + src/sql/das/ob_das_scan_op.cpp | 226 +- src/sql/das/ob_das_scan_op.h | 11 +- src/sql/das/ob_das_update_op.cpp | 11 +- src/sql/das/ob_das_vec_define.cpp | 26 + src/sql/das/ob_das_vec_define.h | 180 ++ src/sql/das/ob_domain_index_lookup_op.cpp | 3 +- src/sql/das/ob_domain_index_lookup_op.h | 4 +- src/sql/das/ob_text_retrieval_op.cpp | 1 - src/sql/das/ob_vector_index_lookup_op.cpp | 1264 +++++++++ src/sql/das/ob_vector_index_lookup_op.h | 182 ++ .../aggregate/ob_aggregate_processor.cpp | 35 +- src/sql/engine/basic/ob_compact_row.cpp | 63 + src/sql/engine/basic/ob_compact_row.h | 63 +- src/sql/engine/basic/ob_expr_values_op.cpp | 6 +- src/sql/engine/basic/ob_temp_column_store.cpp | 227 +- src/sql/engine/basic/ob_temp_column_store.h | 20 +- src/sql/engine/basic/ob_temp_row_store.cpp | 231 +- src/sql/engine/basic/ob_temp_row_store.h | 25 +- .../engine/basic/ob_vector_result_holder.cpp | 180 +- .../engine/basic/ob_vector_result_holder.h | 10 +- src/sql/engine/dml/ob_conflict_checker.cpp | 135 + src/sql/engine/dml/ob_conflict_checker.h | 8 +- src/sql/engine/dml/ob_dml_service.cpp | 4 + src/sql/engine/expr/ob_array_cast.cpp | 556 ++++ src/sql/engine/expr/ob_array_cast.h | 91 + src/sql/engine/expr/ob_array_expr_utils.cpp | 1189 +++++++++ src/sql/engine/expr/ob_array_expr_utils.h | 142 + src/sql/engine/expr/ob_batch_eval_util.cpp | 59 + src/sql/engine/expr/ob_batch_eval_util.h | 201 ++ src/sql/engine/expr/ob_datum_cast.cpp | 207 +- src/sql/engine/expr/ob_expr.cpp | 73 +- src/sql/engine/expr/ob_expr.h | 14 +- src/sql/engine/expr/ob_expr_add.cpp | 138 +- src/sql/engine/expr/ob_expr_add.h | 23 +- .../expr/ob_expr_arithmetic_result_type.map | 10 +- src/sql/engine/expr/ob_expr_array.cpp | 251 ++ src/sql/engine/expr/ob_expr_array.h | 53 + .../engine/expr/ob_expr_array_contains.cpp | 497 ++++ src/sql/engine/expr/ob_expr_array_contains.h | 62 + src/sql/engine/expr/ob_expr_cast.cpp | 19 +- src/sql/engine/expr/ob_expr_cmp_func.cpp | 198 +- src/sql/engine/expr/ob_expr_column_conv.cpp | 5 + src/sql/engine/expr/ob_expr_div.cpp | 24 + src/sql/engine/expr/ob_expr_div.h | 2 + .../engine/expr/ob_expr_div_result_type.map | 4 +- .../engine/expr/ob_expr_eval_functions.cpp | 165 +- src/sql/engine/expr/ob_expr_is.cpp | 3 + src/sql/engine/expr/ob_expr_minus.cpp | 145 + src/sql/engine/expr/ob_expr_minus.h | 22 + src/sql/engine/expr/ob_expr_mul.cpp | 29 +- src/sql/engine/expr/ob_expr_mul.h | 3 + src/sql/engine/expr/ob_expr_operator.cpp | 65 + src/sql/engine/expr/ob_expr_operator.h | 7 + .../engine/expr/ob_expr_operator_factory.cpp | 37 + .../expr/ob_expr_relational_cmp_type.map | 10 +- src/sql/engine/expr/ob_expr_res_type.h | 1 + .../engine/expr/ob_expr_result_type_util.cpp | 86 + .../engine/expr/ob_expr_result_type_util.h | 10 + src/sql/engine/expr/ob_expr_sql_udt_utils.cpp | 30 + src/sql/engine/expr/ob_expr_sql_udt_utils.h | 2 + src/sql/engine/expr/ob_expr_vec_data.cpp | 80 + src/sql/engine/expr/ob_expr_vec_data.h | 49 + src/sql/engine/expr/ob_expr_vec_key.cpp | 83 + src/sql/engine/expr/ob_expr_vec_key.h | 49 + src/sql/engine/expr/ob_expr_vec_scn.cpp | 80 + src/sql/engine/expr/ob_expr_vec_scn.h | 50 + src/sql/engine/expr/ob_expr_vec_type.cpp | 83 + src/sql/engine/expr/ob_expr_vec_type.h | 49 + src/sql/engine/expr/ob_expr_vec_vector.cpp | 100 + src/sql/engine/expr/ob_expr_vec_vector.h | 50 + src/sql/engine/expr/ob_expr_vec_vid.cpp | 100 + src/sql/engine/expr/ob_expr_vec_vid.h | 49 + src/sql/engine/expr/ob_expr_vector.cpp | 341 +++ src/sql/engine/expr/ob_expr_vector.h | 201 ++ .../engine/expr/ob_expr_xml_func_helper.cpp | 48 +- .../join/hash_join/ob_hash_join_struct.cpp | 18 +- src/sql/engine/join/ob_join_vec_op.cpp | 2 + src/sql/engine/ob_exec_context.cpp | 40 + src/sql/engine/ob_exec_context.h | 7 +- src/sql/engine/ob_operator.cpp | 125 +- src/sql/engine/ob_operator.h | 2 + src/sql/engine/ob_physical_plan_ctx.cpp | 83 + src/sql/engine/ob_physical_plan_ctx.h | 5 + src/sql/engine/ob_serializable_function.h | 6 + src/sql/engine/ob_subschema_ctx.cpp | 259 +- src/sql/engine/ob_subschema_ctx.h | 45 +- .../pdml/static/ob_px_sstable_insert_op.cpp | 21 +- .../pdml/static/ob_px_sstable_insert_op.h | 9 +- .../px/exchange/ob_px_dist_transmit_op.cpp | 2 +- .../engine/px/exchange/ob_px_transmit_op.cpp | 20 + .../engine/px/exchange/ob_px_transmit_op.h | 1 + src/sql/engine/px/ob_px_row_store.cpp | 7 +- .../p2p_datahub/ob_runtime_filter_vec_msg.cpp | 6 +- .../sort/ob_sort_key_fetcher_vec_op.cpp | 5 +- src/sql/engine/sort/ob_sort_vec_op_impl.h | 1 + src/sql/engine/sort/ob_sort_vec_op_impl.ipp | 11 +- .../engine/subquery/ob_subplan_scan_op.cpp | 27 + src/sql/engine/subquery/ob_subplan_scan_op.h | 1 + .../engine/table/ob_odps_table_row_iter.cpp | 4 +- .../engine/table/ob_orc_table_row_iter.cpp | 3 +- .../table/ob_parquet_table_row_iter.cpp | 15 +- src/sql/engine/table/ob_table_scan_op.cpp | 26 + src/sql/engine/table/ob_table_scan_op.h | 2 + src/sql/executor/ob_execute_result.cpp | 1 + src/sql/ob_sql.cpp | 18 +- src/sql/ob_sql_context.cpp | 5 +- src/sql/ob_sql_context.h | 3 +- src/sql/ob_sql_define.h | 3 +- src/sql/ob_sql_utils.cpp | 7 + src/sql/optimizer/ob_del_upd_log_plan.cpp | 2 +- src/sql/optimizer/ob_index_info_cache.h | 4 + src/sql/optimizer/ob_insert_log_plan.cpp | 10 +- src/sql/optimizer/ob_join_order.cpp | 93 +- src/sql/optimizer/ob_join_order.h | 8 + src/sql/optimizer/ob_log_del_upd.cpp | 10 +- src/sql/optimizer/ob_log_plan.cpp | 244 +- src/sql/optimizer/ob_log_plan.h | 18 + src/sql/optimizer/ob_log_table_scan.cpp | 449 +++- src/sql/optimizer/ob_log_table_scan.h | 89 +- src/sql/optimizer/ob_logical_operator.cpp | 2 +- src/sql/optimizer/ob_opt_est_cost_model.h | 3 + .../parser/non_reserved_keywords_mysql_mode.c | 8 + src/sql/parser/ob_char_type.h | 4 +- src/sql/parser/parse_node.h | 2 + src/sql/parser/sql_parser_base.h | 3 +- src/sql/parser/sql_parser_mysql_mode.l | 2 +- src/sql/parser/sql_parser_mysql_mode.y | 202 +- src/sql/printer/ob_dml_stmt_printer.cpp | 12 + src/sql/printer/ob_dml_stmt_printer.h | 1 + src/sql/printer/ob_raw_expr_printer.cpp | 4 + src/sql/printer/ob_select_stmt_printer.cpp | 2 + .../resolver/ddl/ob_alter_table_resolver.cpp | 18 +- .../resolver/ddl/ob_create_index_resolver.cpp | 63 +- .../resolver/ddl/ob_create_table_resolver.cpp | 91 +- .../resolver/ddl/ob_create_table_resolver.h | 1 + .../resolver/ddl/ob_create_view_resolver.cpp | 10 +- src/sql/resolver/ddl/ob_ddl_resolver.cpp | 457 +++- src/sql/resolver/ddl/ob_ddl_resolver.h | 27 +- .../resolver/dml/ob_default_value_utils.cpp | 21 +- src/sql/resolver/dml/ob_del_upd_resolver.cpp | 176 +- src/sql/resolver/dml/ob_del_upd_resolver.h | 9 + src/sql/resolver/dml/ob_del_upd_stmt.cpp | 3 +- src/sql/resolver/dml/ob_dml_resolver.cpp | 200 +- src/sql/resolver/dml/ob_dml_resolver.h | 10 + src/sql/resolver/dml/ob_dml_stmt.cpp | 26 +- src/sql/resolver/dml/ob_dml_stmt.h | 7 +- src/sql/resolver/dml/ob_insert_resolver.cpp | 11 +- .../dml/ob_multi_table_insert_resolver.cpp | 3 +- src/sql/resolver/dml/ob_select_resolver.cpp | 1 + src/sql/resolver/dml/ob_sql_hint.cpp | 4 +- src/sql/resolver/expr/ob_expr_info_flag.h | 1 + src/sql/resolver/expr/ob_raw_expr.cpp | 21 + src/sql/resolver/expr/ob_raw_expr.h | 20 +- .../resolver/expr/ob_raw_expr_deduce_type.cpp | 207 +- .../resolver/expr/ob_raw_expr_deduce_type.h | 5 + .../expr/ob_raw_expr_info_extractor.cpp | 1 + .../expr/ob_raw_expr_resolver_impl.cpp | 89 +- .../resolver/expr/ob_raw_expr_resolver_impl.h | 3 +- src/sql/resolver/expr/ob_raw_expr_util.cpp | 66 +- src/sql/resolver/expr/ob_raw_expr_util.h | 8 + src/sql/resolver/ob_resolver_utils.cpp | 129 +- src/sql/resolver/ob_resolver_utils.h | 8 + src/sql/resolver/ob_schema_checker.cpp | 5 +- src/sql/resolver/ob_schema_checker.h | 4 +- src/sql/resolver/ob_stmt_resolver.cpp | 3 +- src/sql/rewrite/ob_transformer_impl.cpp | 35 +- src/sql/rewrite/ob_transformer_impl.h | 1 + src/sql/session/ob_basic_session_info.cpp | 5 + src/sql/session/ob_basic_session_info.h | 1 + src/storage/CMakeLists.txt | 8 + src/storage/access/ob_pushdown_aggregate.cpp | 46 +- src/storage/access/ob_pushdown_aggregate.h | 4 + .../cs_encoding/ob_micro_block_cs_decoder.cpp | 6 +- .../encoding/ob_micro_block_decoder.cpp | 6 +- .../index_block/ob_index_block_util.h | 2 +- .../blocksstable/ob_micro_block_reader.cpp | 4 + .../blocksstable/ob_sstable_printer.cpp | 3 +- src/storage/ddl/ob_build_index_task.cpp | 2 +- src/storage/ddl/ob_ddl_lock.cpp | 79 +- src/storage/ddl/ob_ddl_lock.h | 17 +- .../ddl/ob_delete_lob_meta_row_task.cpp | 488 ++++ src/storage/ddl/ob_delete_lob_meta_row_task.h | 138 + .../ddl/ob_direct_insert_sstable_ctx_new.cpp | 76 +- .../ddl/ob_direct_insert_sstable_ctx_new.h | 6 + src/storage/ddl/ob_direct_load_struct.cpp | 384 ++- src/storage/ddl/ob_direct_load_struct.h | 89 +- src/storage/lob/ob_lob_locator.cpp | 30 +- src/storage/lob/ob_lob_locator.h | 3 +- src/storage/lob/ob_lob_util.cpp | 56 + src/storage/lob/ob_lob_util.h | 7 + src/storage/ls/ob_ls.cpp | 3 + src/storage/ls/ob_ls_tablet_service.cpp | 66 + src/storage/ls/ob_ls_tablet_service.h | 5 + src/storage/ob_storage_schema.h | 1 + src/storage/ob_storage_util.cpp | 13 +- src/storage/ob_storage_util.h | 3 + src/storage/tablelock/ob_table_lock_common.h | 5 + .../tablelock/ob_table_lock_rpc_struct.cpp | 3 +- .../cmd/ob_vector_refresh_index_executor.cpp | 739 ++++++ .../cmd/ob_vector_refresh_index_executor.h | 168 ++ .../vector_index/ob_vector_index_refresh.cpp | 589 +++++ .../vector_index/ob_vector_index_refresh.h | 97 + .../ob_vector_index_sched_job_utils.cpp | 129 + .../ob_vector_index_sched_job_utils.h | 70 + .../ob_vector_refresh_idx_transaction.cpp | 241 ++ .../ob_vector_refresh_idx_transaction.h | 67 + .../r/mysql/information_schema.result | 2 + .../r/mysql/all_virtual_data_type.result | 6 +- .../mysql/all_virtual_data_type_class.result | 2 +- .../all_virtual_sys_parameter_stat.result | 1 + .../mysql/desc_virtual_table_in_mysql.result | 26 + .../r/mysql/desc_virtual_table_in_sys.result | 26 + .../r/mysql/inner_table_overall.result | 1 + .../parser/ob_admin_parser_log_entry.cpp | 20 +- .../parser/ob_admin_parser_log_entry.h | 1 + unittest/share/CMakeLists.txt | 2 + unittest/share/test_array_meta.cpp | 486 ++++ .../test_defined_expr_func_by_type.result | 220 +- .../share/test_defined_func_by_type.result | 220 +- .../share/test_vector_index_serialize.cpp | 166 ++ unittest/sql/parser/print_parser_tree.result | 10 +- unittest/sql/parser/test_parser.result | 102 + .../cs_encoding/ob_cs_encoding_test_base.h | 2 +- .../encoding/test_column_decoder.h | 2 +- .../encoding/test_raw_decoder.cpp | 2 +- 443 files changed, 46822 insertions(+), 2893 deletions(-) create mode 100644 deps/oblib/src/lib/udt/ob_array_type.cpp create mode 100644 deps/oblib/src/lib/udt/ob_array_type.h create mode 100644 deps/oblib/src/lib/udt/ob_collection_type.cpp create mode 100644 deps/oblib/src/lib/udt/ob_collection_type.h create mode 100644 deps/oblib/src/lib/vector/ob_vector_util.cpp create mode 100644 deps/oblib/src/lib/vector/ob_vector_util.h create mode 100644 mittest/mtlenv/test_vector_index_adaptor.cpp create mode 100644 src/observer/virtual_table/ob_all_virtual_vector_index_info.cpp create mode 100644 src/observer/virtual_table/ob_all_virtual_vector_index_info.h create mode 100644 src/pl/sys_package/ob_dbms_vector_mysql.cpp create mode 100644 src/pl/sys_package/ob_dbms_vector_mysql.h create mode 100644 src/rootserver/ddl_task/ob_drop_vec_index_task.cpp create mode 100644 src/rootserver/ddl_task/ob_drop_vec_index_task.h create mode 100644 src/rootserver/ddl_task/ob_rebuild_index_task.cpp create mode 100644 src/rootserver/ddl_task/ob_rebuild_index_task.h create mode 100644 src/rootserver/ddl_task/ob_vec_index_build_task.cpp create mode 100644 src/rootserver/ddl_task/ob_vec_index_build_task.h create mode 100644 src/share/inner_table/sys_package/dbms_vector_body_mysql.sql create mode 100644 src/share/inner_table/sys_package/dbms_vector_mysql.sql create mode 100644 src/share/ob_domain_index_builder_util.cpp create mode 100644 src/share/ob_domain_index_builder_util.h create mode 100644 src/share/ob_vec_index_builder_util.cpp create mode 100644 src/share/ob_vec_index_builder_util.h create mode 100644 src/share/vector_index/ob_plugin_vector_index_adaptor.cpp create mode 100644 src/share/vector_index/ob_plugin_vector_index_adaptor.h create mode 100644 src/share/vector_index/ob_plugin_vector_index_scheduler.cpp create mode 100644 src/share/vector_index/ob_plugin_vector_index_scheduler.h create mode 100644 src/share/vector_index/ob_plugin_vector_index_serialize.cpp create mode 100644 src/share/vector_index/ob_plugin_vector_index_serialize.h create mode 100644 src/share/vector_index/ob_plugin_vector_index_service.cpp create mode 100644 src/share/vector_index/ob_plugin_vector_index_service.h create mode 100644 src/share/vector_index/ob_plugin_vector_index_util.cpp create mode 100644 src/share/vector_index/ob_plugin_vector_index_util.h create mode 100644 src/share/vector_index/ob_plugin_vector_index_utils.cpp create mode 100644 src/share/vector_index/ob_plugin_vector_index_utils.h create mode 100644 src/share/vector_index/ob_vector_index_util.cpp create mode 100644 src/share/vector_index/ob_vector_index_util.h create mode 100644 src/share/vector_type/ob_vector_cosine_distance.cpp create mode 100644 src/share/vector_type/ob_vector_cosine_distance.h create mode 100644 src/share/vector_type/ob_vector_ip_distance.cpp create mode 100644 src/share/vector_type/ob_vector_ip_distance.h create mode 100644 src/share/vector_type/ob_vector_l1_distance.cpp create mode 100644 src/share/vector_type/ob_vector_l1_distance.h create mode 100644 src/share/vector_type/ob_vector_l2_distance.cpp create mode 100644 src/share/vector_type/ob_vector_l2_distance.h create mode 100644 src/share/vector_type/ob_vector_norm.cpp create mode 100644 src/share/vector_type/ob_vector_norm.h create mode 100644 src/sql/das/iter/ob_das_vid_merge_iter.cpp create mode 100644 src/sql/das/iter/ob_das_vid_merge_iter.h create mode 100644 src/sql/das/ob_das_dml_vec_iter.cpp create mode 100644 src/sql/das/ob_das_dml_vec_iter.h create mode 100644 src/sql/das/ob_das_vec_define.cpp create mode 100644 src/sql/das/ob_das_vec_define.h create mode 100644 src/sql/das/ob_vector_index_lookup_op.cpp create mode 100644 src/sql/das/ob_vector_index_lookup_op.h create mode 100644 src/sql/engine/expr/ob_array_cast.cpp create mode 100644 src/sql/engine/expr/ob_array_cast.h create mode 100644 src/sql/engine/expr/ob_array_expr_utils.cpp create mode 100644 src/sql/engine/expr/ob_array_expr_utils.h create mode 100644 src/sql/engine/expr/ob_expr_array.cpp create mode 100644 src/sql/engine/expr/ob_expr_array.h create mode 100644 src/sql/engine/expr/ob_expr_array_contains.cpp create mode 100644 src/sql/engine/expr/ob_expr_array_contains.h create mode 100644 src/sql/engine/expr/ob_expr_vec_data.cpp create mode 100644 src/sql/engine/expr/ob_expr_vec_data.h create mode 100644 src/sql/engine/expr/ob_expr_vec_key.cpp create mode 100644 src/sql/engine/expr/ob_expr_vec_key.h create mode 100644 src/sql/engine/expr/ob_expr_vec_scn.cpp create mode 100644 src/sql/engine/expr/ob_expr_vec_scn.h create mode 100644 src/sql/engine/expr/ob_expr_vec_type.cpp create mode 100644 src/sql/engine/expr/ob_expr_vec_type.h create mode 100644 src/sql/engine/expr/ob_expr_vec_vector.cpp create mode 100644 src/sql/engine/expr/ob_expr_vec_vector.h create mode 100644 src/sql/engine/expr/ob_expr_vec_vid.cpp create mode 100644 src/sql/engine/expr/ob_expr_vec_vid.h create mode 100644 src/sql/engine/expr/ob_expr_vector.cpp create mode 100644 src/sql/engine/expr/ob_expr_vector.h create mode 100644 src/storage/ddl/ob_delete_lob_meta_row_task.cpp create mode 100644 src/storage/ddl/ob_delete_lob_meta_row_task.h create mode 100644 src/storage/vector_index/cmd/ob_vector_refresh_index_executor.cpp create mode 100644 src/storage/vector_index/cmd/ob_vector_refresh_index_executor.h create mode 100644 src/storage/vector_index/ob_vector_index_refresh.cpp create mode 100644 src/storage/vector_index/ob_vector_index_refresh.h create mode 100644 src/storage/vector_index/ob_vector_index_sched_job_utils.cpp create mode 100644 src/storage/vector_index/ob_vector_index_sched_job_utils.h create mode 100644 src/storage/vector_index/ob_vector_refresh_idx_transaction.cpp create mode 100644 src/storage/vector_index/ob_vector_refresh_idx_transaction.h create mode 100644 unittest/share/test_array_meta.cpp create mode 100644 unittest/share/test_vector_index_serialize.cpp diff --git a/deps/init/oceanbase.el7.aarch64.deps b/deps/init/oceanbase.el7.aarch64.deps index 51f5062ede..d6558ec484 100644 --- a/deps/init/oceanbase.el7.aarch64.deps +++ b/deps/init/oceanbase.el7.aarch64.deps @@ -34,6 +34,7 @@ devdeps-protobuf-c-1.4.1-100000072023102410.el7.aarch64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el7.aarch64.rpm devdeps-apache-arrow-9.0.0-302024052920.el7.aarch64.rpm # devdeps-apache-orc-1.8.3-202024072510.el7.aarch64.rpm +devdeps-vsag-1.0.0-102024082710.el7.aarch64.rpm [tools] obdevtools-binutils-2.30-12022100413.el7.aarch64.rpm diff --git a/deps/init/oceanbase.el7.x86_64.deps b/deps/init/oceanbase.el7.x86_64.deps index 5669fc46e0..e83b6afdb2 100644 --- a/deps/init/oceanbase.el7.x86_64.deps +++ b/deps/init/oceanbase.el7.x86_64.deps @@ -39,6 +39,7 @@ devdeps-apache-arrow-9.0.0-222024052223.el7.x86_64.rpm devdeps-hyperscan-5.4.2-152024071714.el7.x86_64.rpm devdeps-apache-orc-1.8.3-202024072510.el7.x86_64.rpm # devdeps-apache-orc-1.8.3-202024072510.el7.x86_64.rpm +devdeps-vsag-1.0.0-102024082710.el7.x86_64.rpm [tools] obdevtools-binutils-2.30-12022100413.el7.x86_64.rpm diff --git a/deps/init/oceanbase.el8.aarch64.deps b/deps/init/oceanbase.el8.aarch64.deps index 5943a48eb1..9e903aef63 100644 --- a/deps/init/oceanbase.el8.aarch64.deps +++ b/deps/init/oceanbase.el8.aarch64.deps @@ -34,6 +34,7 @@ devdeps-protobuf-c-1.4.1-100000072023102410.el8.aarch64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el8.aarch64.rpm devdeps-apache-arrow-9.0.0-322024052923.el8.aarch64.rpm # devdeps-apache-orc-1.8.3-202024072510.el8.aarch64.rpm +devdeps-vsag-1.0.0-102024082710.el8.aarch64.rpm [tools] obdevtools-binutils-2.30-12022100413.el8.aarch64.rpm diff --git a/deps/init/oceanbase.el8.x86_64.deps b/deps/init/oceanbase.el8.x86_64.deps index 89ba8bf992..784d2975e1 100644 --- a/deps/init/oceanbase.el8.x86_64.deps +++ b/deps/init/oceanbase.el8.x86_64.deps @@ -39,6 +39,7 @@ devdeps-hyperscan-5.4.2-152024071714.el8.x86_64.rpm devdeps-apache-orc-1.8.3-202024072510.el8.x86_64.rpm devdeps-odps-cpp-sdk-1.0.0-482024080517.el8.x86_64.rpm # devdeps-apache-orc-1.8.3-202024072510.el8.x86_64.rpm +devdeps-vsag-1.0.0-102024082710.el8.x86_64.rpm [tools] obdevtools-binutils-2.30-12022100413.el8.x86_64.rpm diff --git a/deps/init/oceanbase.el9.aarch64.deps b/deps/init/oceanbase.el9.aarch64.deps index 7ab33f270a..497491f66b 100644 --- a/deps/init/oceanbase.el9.aarch64.deps +++ b/deps/init/oceanbase.el9.aarch64.deps @@ -38,6 +38,7 @@ devdeps-protobuf-c-1.4.1-100000072023102410.el8.aarch64.rpm devdeps-roaringbitmap-croaring-3.0.0-42024042816.el8.aarch64.rpm devdeps-apache-arrow-9.0.0-322024052923.el8.aarch64.rpm # devdeps-apache-orc-1.8.3-202024072510.el8.aarch64.rpm +devdeps-vsag-1.0.0-102024082710.el8.aarch64.rpm [deps-el9] devdeps-apr-1.6.5-232023090616.el9.aarch64.rpm target=el9 diff --git a/deps/init/oceanbase.el9.x86_64.deps b/deps/init/oceanbase.el9.x86_64.deps index b5e055eb1e..a8685faf49 100644 --- a/deps/init/oceanbase.el9.x86_64.deps +++ b/deps/init/oceanbase.el9.x86_64.deps @@ -42,6 +42,7 @@ devdeps-roaringbitmap-croaring-3.0.0-42024042816.el8.x86_64.rpm devdeps-hyperscan-5.4.2-152024071714.el8.x86_64.rpm devdeps-apache-orc-1.8.3-202024072510.el8.x86_64.rpm # devdeps-apache-orc-1.8.3-202024072510.el8.x86_64.rpm +devdeps-vsag-1.0.0-102024082710.el8.x86_64.rpm [deps-el9] devdeps-apr-1.6.5-232023090616.el9.x86_64.rpm target=el9 diff --git a/deps/oblib/src/common/cell/ob_cell_reader.cpp b/deps/oblib/src/common/cell/ob_cell_reader.cpp index 2e63442194..c365484e69 100644 --- a/deps/oblib/src/common/cell/ob_cell_reader.cpp +++ b/deps/oblib/src/common/cell/ob_cell_reader.cpp @@ -528,6 +528,7 @@ int ObCellReader::parse(uint64_t *column_id) case ObLongTextType: case ObJsonType: case ObGeometryType: + case ObCollectionSQLType: READ_TEXT(static_cast(meta->type_), obj_); break; case ObBitType: @@ -733,6 +734,7 @@ int ObCellReader::read_cell(common::ObObj &obj) case ObLongTextType: case ObJsonType: case ObGeometryType: + case ObCollectionSQLType: READ_TEXT(static_cast(meta->type_), obj); break; case ObBitType: diff --git a/deps/oblib/src/common/cell/ob_cell_writer.cpp b/deps/oblib/src/common/cell/ob_cell_writer.cpp index 248ec64fbd..f70686594f 100644 --- a/deps/oblib/src/common/cell/ob_cell_writer.cpp +++ b/deps/oblib/src/common/cell/ob_cell_writer.cpp @@ -561,7 +561,8 @@ int ObCellWriter::append(uint64_t column_id, const ObObj &obj, ObObj *clone_obj) case ObMediumTextType: case ObLongTextType: case ObJsonType: - case ObGeometryType: { + case ObGeometryType: + case ObCollectionSQLType: { ret = write_text(obj, obj.get_type(), obj.get_string(), clone_obj); break; } diff --git a/deps/oblib/src/common/ob_accuracy.cpp b/deps/oblib/src/common/ob_accuracy.cpp index 419b529bbb..ae4ee54985 100644 --- a/deps/oblib/src/common/ob_accuracy.cpp +++ b/deps/oblib/src/common/ob_accuracy.cpp @@ -72,11 +72,10 @@ const ObAccuracy ObAccuracy::DDL_DEFAULT_ACCURACY[ObMaxType] = { ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // geometry ObAccuracy(), // user defined type in sql ObAccuracy(10, 0), // decimal int - ObAccuracy(), // collection type in sql + ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // collection type in sql ObAccuracy(10, 0), // mysql date. ObAccuracy(19, 6), // mysql datetime. ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // roaringbitmap, roaringbitmap serialized size is possibly to exceed 512M. - }; const ObAccuracy ObAccuracy::DDL_DEFAULT_ACCURACY2[ORACLE_MODE + 1][ObMaxType] = { @@ -132,7 +131,7 @@ const ObAccuracy ObAccuracy::DDL_DEFAULT_ACCURACY2[ORACLE_MODE + 1][ObMaxType] = ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // geometry ObAccuracy(), // user defined type in sql ObAccuracy(10, 0), // decimal int - ObAccuracy(), // collection type in sql + ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // collection type in sql ObAccuracy(10, 0), // mysql date. ObAccuracy(19, 6), // mysql datetime. ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // roaringbitmap @@ -189,7 +188,7 @@ const ObAccuracy ObAccuracy::DDL_DEFAULT_ACCURACY2[ORACLE_MODE + 1][ObMaxType] = ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // geometry ObAccuracy(), // user defined type in sql ObAccuracy(10, 0), // decimal int - ObAccuracy(), // collection type in sql + ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // collection type in sql ObAccuracy(10, 0), // mysql date. ObAccuracy(19, 6), // mysql datetime. ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // roaringbitmap @@ -248,7 +247,7 @@ const ObAccuracy ObAccuracy::MAX_ACCURACY[ObMaxType] = { ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // geometry ObAccuracy(), // user defined type in sql ObAccuracy(OB_MAX_DECIMAL_PRECISION, OB_MAX_DECIMAL_SCALE), // decimal int - ObAccuracy(), // collection type in sql + ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // collection type in sql ObAccuracy(10, 0), // mysql date. ObAccuracy(19, 6), // mysql datetime. ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // roaringbitmap @@ -307,7 +306,7 @@ const ObAccuracy ObAccuracy::MAX_ACCURACY2[ORACLE_MODE + 1][ObMaxType] = { ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // geometry ObAccuracy(), // user defined type in sql ObAccuracy(OB_MAX_DECIMAL_PRECISION, OB_MAX_DECIMAL_SCALE), // decimal int - ObAccuracy(), // collection type in sql + ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // collection type in sql ObAccuracy(10, 0), // mysql date. ObAccuracy(19, 6), // mysql datetime. ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // roaringbitmap @@ -364,7 +363,7 @@ const ObAccuracy ObAccuracy::MAX_ACCURACY2[ORACLE_MODE + 1][ObMaxType] = { ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // geometry ObAccuracy(), // user defined type in sql ObAccuracy(OB_MAX_DECIMAL_PRECISION, OB_MAX_DECIMAL_SCALE), // decimal int - ObAccuracy(), // collection type in sql + ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // collection type in sql ObAccuracy(10, 0), // mysql date. ObAccuracy(19, 6), // mysql datetime. ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // roaringbitmap @@ -423,7 +422,7 @@ const ObAccuracy ObAccuracy::DML_DEFAULT_ACCURACY[ObMaxType] = { ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // geometry ObAccuracy(), // user defined type in sql ObAccuracy(), // decimal int - ObAccuracy(), // collection type in sql + ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // collection type in sql ObAccuracy(0, 6), // mysql date. ObAccuracy(0, 0), // mysql datetime. ObAccuracy(OB_MAX_LONGTEXT_LENGTH), // roaringbitmap diff --git a/deps/oblib/src/common/object/ob_obj_funcs.h b/deps/oblib/src/common/object/ob_obj_funcs.h index 8533a0a29c..9b23293078 100644 --- a/deps/oblib/src/common/object/ob_obj_funcs.h +++ b/deps/oblib/src/common/object/ob_obj_funcs.h @@ -3524,7 +3524,7 @@ inline int obj_print_json(const ObObj &obj, char *buf, int6 } else { J_OBJ_START(); PRINT_META(); - BUF_PRINTO("COLLECTION"); + BUF_PRINTO("ARRAY"); J_COLON(); BUF_PRINTO(udt_data); J_OBJ_END(); diff --git a/deps/oblib/src/common/object/ob_obj_type.cpp b/deps/oblib/src/common/object/ob_obj_type.cpp index a7c246e341..25800f58ea 100644 --- a/deps/oblib/src/common/object/ob_obj_type.cpp +++ b/deps/oblib/src/common/object/ob_obj_type.cpp @@ -105,7 +105,7 @@ const char *ob_sql_type_str(ObObjType type) "GEOMETRY", "UDT", "DECIMAL_INT", - "COLLECTION", + "ARRAY", "MYSQL_DATE", "MYSQL_DATETIME", "ROARINGBITMAP", @@ -165,7 +165,7 @@ const char *ob_sql_type_str(ObObjType type) "SDO_GEOMETRY", "UDT", "DECIMAL_INT", - "COLLECTION", + "ARRAY", "MYSQL_DATE", "MYSQL_DATETIME", "ROARINGBITMAP", @@ -549,6 +549,21 @@ int ob_udt_sub_type_str(char *buff, return ret; } +int ob_collection_str(const ObObjType &type, const common::ObIArray &type_info, char *buff, int64_t buff_length, int64_t &pos) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!ob_is_collection_sql_type(type)) || type_info.count() < 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected column type", K(ret), K(type), K(type_info.count())); + } else { + ObString cur_str = type_info.at(0); + if (OB_FAIL(databuff_printf(buff, buff_length, pos, "%.*s", cur_str.length(), cur_str.ptr()))) { + LOG_WARN("fail to print array type info", K(ret), K(buff_length), K(pos)); + } + } + return ret; +} + int ob_enum_or_set_str(const ObObjMeta &obj_meta, const common::ObIArray &type_info, char *buff, int64_t buff_length, int64_t &pos) { int ret = OB_SUCCESS; @@ -706,10 +721,11 @@ int ob_sql_type_str_with_coll(char *buff, int64_t precision, int64_t scale, ObCollationType coll_type, + const common::ObIArray &type_info, const uint64_t sub_type/* common::ObGeoType::GEOTYPEMAX */) { int ret = OB_SUCCESS; - if (OB_FAIL(ob_sql_type_str(buff, buff_length, pos, type, length, precision, scale, coll_type, sub_type))) { + if (OB_FAIL(ob_sql_type_str(buff, buff_length, pos, type, length, precision, scale, coll_type, type_info, sub_type))) { LOG_WARN("fail to get data type str", K(ret), K(sub_type), K(buff), K(buff_length), K(pos)); } else if (lib::is_mysql_mode() && ob_is_string_type(type) && CS_TYPE_BINARY != coll_type) { if (ObCharset::is_default_collation(coll_type)) { @@ -733,6 +749,7 @@ int ob_sql_type_str(char *buff, int64_t precision, int64_t scale, ObCollationType coll_type, + const common::ObIArray &type_info, const uint64_t sub_type/* common::ObGeoType::GEOTYPEMAX */) { int ret = OB_SUCCESS; @@ -813,6 +830,11 @@ int ob_sql_type_str(char *buff, if (OB_FAIL(ob_udt_sub_type_str(buff, buff_length, pos, dummy_arr, sub_type, true))) { LOG_WARN("fail to get udt sub type str", K(ret), K(sub_type), K(buff), K(buff_length), K(pos)); } + } else if (ob_is_collection_sql_type(type)) { + int64_t pos = 0; + if (OB_FAIL(ob_collection_str(type, type_info, buff, buff_length, pos))) { + LOG_WARN("fail to get enum_or_set str", K(ret), K(type), K(type_info), K(buff_length), K(pos)); + } } else { ret = sql_type_name[OB_LIKELY(type < ObMaxType) ? type : ObMaxType](buff, buff_length, pos, length, precision, scale, coll_type); } @@ -823,6 +845,7 @@ int ob_sql_type_str(char *buff, int64_t buff_length, ObObjType type, ObCollationType coll_type, + const common::ObIArray &type_info, const common::ObGeoType geo_type/* common::ObGeoType::GEOTYPEMAX */) { int ret = OB_SUCCESS; @@ -902,6 +925,11 @@ int ob_sql_type_str(char *buff, if (OB_FAIL(ob_geometry_sub_type_str(buff, buff_length, pos, geo_type))) { LOG_WARN("fail to get geometry sub type str", K(ret), K(geo_type), K(buff), K(buff_length), K(pos)); } + } else if (ob_is_collection_sql_type(type)) { + int64_t pos = 0; + if (OB_FAIL(ob_collection_str(type, type_info, buff, buff_length, pos))) { + LOG_WARN("fail to get enum_or_set str", K(ret), K(type), K(type_info), K(buff_length), K(pos)); + } } else if (OB_ISNULL(sql_type_name[type])) { ret = OB_ERR_UNEXPECTED; LOG_WARN("function pointer is NULL", K(type), K(ret)); @@ -939,6 +967,10 @@ int ob_sql_type_str(const ObObjMeta &obj_meta, if (OB_FAIL(ob_udt_sub_type_str(buff, buff_length, pos, type_info, sub_type))) { LOG_WARN("fail to get udt sub type str", K(ret), K(sub_type), K(buff), K(buff_length), K(pos)); } + } else if (obj_meta.is_collection_sql_type()) { + if (OB_FAIL(ob_collection_str(obj_meta.get_type(), type_info, buff, buff_length, pos))) { + LOG_WARN("fail to get enum_or_set str", K(ret), K(obj_meta), K(accuracy), K(buff_length), K(pos)); + } } else { ObObjType datatype = obj_meta.get_type(); ObCollationType coll_type = obj_meta.get_collation_type(); @@ -952,7 +984,7 @@ int ob_sql_type_str(const ObObjMeta &obj_meta, if (OB_FAIL(ob_sql_type_str(buff, buff_length, pos, datatype, length, precision_or_length_semantics, - accuracy.get_scale(), coll_type, sub_type))) { + accuracy.get_scale(), coll_type, type_info, sub_type))) { LOG_WARN("fail to print sql type", K(ret), K(obj_meta), K(accuracy)); } } diff --git a/deps/oblib/src/common/object/ob_obj_type.h b/deps/oblib/src/common/object/ob_obj_type.h index 512597169d..ede7a464e1 100644 --- a/deps/oblib/src/common/object/ob_obj_type.h +++ b/deps/oblib/src/common/object/ob_obj_type.h @@ -1342,6 +1342,7 @@ int ob_sql_type_str_with_coll(char *buff, int64_t precision, int64_t scale, ObCollationType coll_type, + const common::ObIArray &type_info, const uint64_t sub_type = static_cast(common::ObGeoType::GEOTYPEMAX)); //such as "double(10,7)". with accuracy @@ -1353,6 +1354,7 @@ int ob_sql_type_str(char *buff, int64_t precision, int64_t scale, ObCollationType coll_type, + const common::ObIArray &type_info, const uint64_t sub_type = static_cast(common::ObGeoType::GEOTYPEMAX)); int ob_sql_type_str(const common::ObObjMeta &obj_meta, @@ -1368,6 +1370,7 @@ int ob_sql_type_str(char *buff, int64_t buff_length, ObObjType type, ObCollationType coll_type, + const common::ObIArray &type_info, const common::ObGeoType geo_type = common::ObGeoType::GEOTYPEMAX); // print obj type class string diff --git a/deps/oblib/src/common/object/ob_object.cpp b/deps/oblib/src/common/object/ob_object.cpp index f8575f15fc..731b62c1dd 100644 --- a/deps/oblib/src/common/object/ob_object.cpp +++ b/deps/oblib/src/common/object/ob_object.cpp @@ -1311,7 +1311,8 @@ int ObObj::build_not_strict_default_value(int16_t precision) case ObTextType: case ObMediumTextType: case ObLongTextType: - case ObGeometryType:{ + case ObGeometryType: + case ObCollectionSQLType:{ ObString null_str; set_string(data_type, null_str); meta_.set_inrow(); diff --git a/deps/oblib/src/common/object/ob_object.h b/deps/oblib/src/common/object/ob_object.h index 68023ef49e..488bbf62a1 100644 --- a/deps/oblib/src/common/object/ob_object.h +++ b/deps/oblib/src/common/object/ob_object.h @@ -425,7 +425,10 @@ public: OB_INLINE void set_collation_level(ObCollationLevel cs_level) { cs_level_ = cs_level; } OB_INLINE void set_collation_type(ObCollationType cs_type) { cs_type_ = cs_type; } - OB_INLINE ObCollationType get_collation_type() { return static_cast(cs_type_); } + OB_INLINE ObCollationType get_collation_type() { + // ObUserDefinedSQLType reused cs_type as part of sub schema id, therefore always return CS_TYPE_BINARY + return (is_user_defined_sql_type() || is_collection_sql_type()) ? CS_TYPE_BINARY : static_cast(cs_type_); + } OB_INLINE void set_default_collation_type() { set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); } OB_INLINE ObCollationLevel get_collation_level() const { return static_cast(cs_level_); } OB_INLINE ObCollationType get_collation_type() const { diff --git a/deps/oblib/src/lib/CMakeLists.txt b/deps/oblib/src/lib/CMakeLists.txt index 4424571a65..a023ba92c2 100644 --- a/deps/oblib/src/lib/CMakeLists.txt +++ b/deps/oblib/src/lib/CMakeLists.txt @@ -226,6 +226,8 @@ ob_set_subtarget(oblib_lib common_mixed wide_integer/ob_wide_integer_cmp_funcs.cpp wide_integer/ob_wide_integer_str_funcs.cpp udt/ob_udt_type.cpp + udt/ob_collection_type.cpp + udt/ob_array_type.cpp xml/ob_mul_mode_reader.cpp xml/ob_xml.cpp xml/ob_xml_parser.cpp @@ -340,6 +342,10 @@ ob_set_subtarget(oblib_lib vtoa vtoa/ob_vtoa_util.cpp ) +ob_set_subtarget(oblib_lib ob_vector_util + vector/ob_vector_util.cpp +) + ob_lib_add_target(oblib_lib) ob_set_subtarget(oblib_lib_bitmap common @@ -408,3 +414,40 @@ target_link_libraries(oblib_lib ${DEP_DIR}/lib/libxml2.a ${DEP_DIR}/lib/liblzma.a ) + +# vsag vector library +target_link_directories( + oblib_lib PUBLIC ${DEP_DIR}/lib ${DEP_DIR}/lib/vsag_lib +) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") + +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + target_link_libraries(oblib_lib + PUBLIC + libob_vsag_static.a + libcpuinfo.a + libsimd.a + libvsag_static.a + libdiskann.a + libopenblas.a + libgfortran_static.a + libquadmath.a + libgomp_static.a + libroaring.a + ) +elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + target_link_libraries(oblib_lib + PUBLIC + libob_vsag_static.a + libcpuinfo.a + libsimd.a + libvsag_static.a + libdiskann.a + libopenblas.a + libgfortran_static.a + libgomp_static.a + libroaring.a + ) +else() + message(FATAL_ERROR "Unsupported architecture: ${CMAKE_SYSTEM_PROCESSOR}") +endif() diff --git a/deps/oblib/src/lib/mysqlclient/ob_mysql_proxy.h b/deps/oblib/src/lib/mysqlclient/ob_mysql_proxy.h index c8962c387a..e886d19dd0 100644 --- a/deps/oblib/src/lib/mysqlclient/ob_mysql_proxy.h +++ b/deps/oblib/src/lib/mysqlclient/ob_mysql_proxy.h @@ -39,7 +39,7 @@ public: ObSessionDDLInfo() : is_ddl_(false), is_source_table_hidden_(false), is_dest_table_hidden_(false), is_heap_table_ddl_(false), is_ddl_check_default_value_bit_(false), is_mview_complete_refresh_(false), is_refreshing_mview_(false), - is_retryable_ddl_(false), reserved_bit_(0) + is_retryable_ddl_(false), is_dummy_ddl_for_inner_visibility_(false), reserved_bit_(0) { } ~ObSessionDDLInfo() = default; @@ -59,6 +59,8 @@ public: bool is_refreshing_mview() const { return is_refreshing_mview_; } void set_retryable_ddl(const bool flag) { is_retryable_ddl_ = flag; } bool is_retryable_ddl() const { return is_retryable_ddl_; } + void set_is_dummy_ddl_for_inner_visibility(const bool flag) { is_dummy_ddl_for_inner_visibility_ = flag; } + bool is_dummy_ddl_for_inner_visibility() const { return is_dummy_ddl_for_inner_visibility_; } inline void reset() { ddl_info_ = 0; } TO_STRING_KV(K_(ddl_info)); OB_UNIS_VERSION(1); @@ -70,7 +72,8 @@ public: static const int64_t IS_MVIEW_COMPLETE_REFRESH_BIT = 1; static const int64_t IS_REFRESHING_MVIEW_BIT = 1; static const int64_t IS_RETRYABLE_DDL_BIT = 1; - static const int64_t RESERVED_BIT = 64 - IS_DDL_BIT - 2 * IS_TABLE_HIDDEN_BIT - IS_HEAP_TABLE_DDL_BIT - IS_DDL_CHECK_DEFAULT_VALUE_BIT - IS_MVIEW_COMPLETE_REFRESH_BIT - IS_REFRESHING_MVIEW_BIT - IS_RETRYABLE_DDL_BIT; + static const int64_t IS_DUMMY_DDL_FOR_INNER_VISIBILITY_BIT = 1; + static const int64_t RESERVED_BIT = 64 - IS_DDL_BIT - 2 * IS_TABLE_HIDDEN_BIT - IS_HEAP_TABLE_DDL_BIT - IS_DDL_CHECK_DEFAULT_VALUE_BIT - IS_MVIEW_COMPLETE_REFRESH_BIT - IS_REFRESHING_MVIEW_BIT - IS_RETRYABLE_DDL_BIT - IS_DUMMY_DDL_FOR_INNER_VISIBILITY_BIT; union { uint64_t ddl_info_; struct { @@ -82,6 +85,12 @@ public: uint64_t is_mview_complete_refresh_: IS_MVIEW_COMPLETE_REFRESH_BIT; uint64_t is_refreshing_mview_: IS_REFRESHING_MVIEW_BIT; uint64_t is_retryable_ddl_: IS_RETRYABLE_DDL_BIT; + /** + * If is_dummy_ddl_for_inner_visibility_ is enabled, DML operations on the index table will be allowed. + * Currently only available for vector-index fast refresh feature. + * When is_ddl_ is also enabled, it will override is_dummy_ddl_for_inner_visibility_. + */ + uint64_t is_dummy_ddl_for_inner_visibility_: IS_DUMMY_DDL_FOR_INNER_VISIBILITY_BIT; uint64_t reserved_bit_ : RESERVED_BIT; }; }; diff --git a/deps/oblib/src/lib/ob_define.h b/deps/oblib/src/lib/ob_define.h index 6e16d74b70..baf416b81f 100644 --- a/deps/oblib/src/lib/ob_define.h +++ b/deps/oblib/src/lib/ob_define.h @@ -693,6 +693,14 @@ const char *const OB_PARTITION_SHARDING_NONE = "NONE"; const char *const OB_PARTITION_SHARDING_PARTITION = "PARTITION"; const char *const OB_PARTITION_SHARDING_ADAPTIVE = "ADAPTIVE"; +// vector index search +const char *const OB_VEC_VID_COLUMN_NAME = "__vid"; +const char *const OB_VEC_TYPE_COLUMN_NAME_PREFIX = "__type"; +const char *const OB_VEC_VECTOR_COLUMN_NAME_PREFIX = "__vector"; +const char *const OB_VEC_SCN_COLUMN_NAME_PREFIX = "__scn"; +const char *const OB_VEC_KEY_COLUMN_NAME_PREFIX = "__key"; +const char *const OB_VEC_DATA_COLUMN_NAME_PREFIX = "__data"; + // fulltext search const char *const OB_DOC_ID_COLUMN_NAME = "__doc_id"; const char *const OB_WORD_SEGMENT_COLUMN_NAME_PREFIX = "__word_segment"; diff --git a/deps/oblib/src/lib/ob_name_def.h b/deps/oblib/src/lib/ob_name_def.h index 6b3c344de1..ab0f7c49ec 100644 --- a/deps/oblib/src/lib/ob_name_def.h +++ b/deps/oblib/src/lib/ob_name_def.h @@ -393,6 +393,13 @@ #define N_OR "||" #define N_NOT "!" #define N_POW "pow" +#define N_VECTOR_L1_DISTANCE "l1_distance" +#define N_VECTOR_L2_DISTANCE "l2_distance" +#define N_VECTOR_INNER_PRODUCT "inner_product" +#define N_VECTOR_COS_DISTANCE "cosine_distance" +#define N_VECTOR_DIMS "vector_dims" +#define N_VECTOR_NORM "vector_norm" +#define N_VECTOR_DISTANCE "vector_distance" #define N_XOR "^" #define N_ROWEQ "row_eq" #define N_ROWLE "row_le" @@ -794,6 +801,14 @@ #define N_PART_ID "part_id" #define N_INNER_GET "inner_get" #define N_MATCH_AGAINST "match_against" + +#define N_VEC_VID "vec_vid" +#define N_VEC_TYPE "vec_type" +#define N_VEC_VECTOR "vec_vector" +#define N_VEC_SCN "vec_scn" +#define N_VEC_KEY "vec_key" +#define N_VEC_DATA "vec_data" + #define N_DOC_ID "doc_id" #define N_WORD_SEGMENT "word_segment" #define N_WORD_COUNT "word_count" @@ -1096,6 +1111,8 @@ #define N_PRIV_ST_EQUALS "st_equals" #define N_PRIV_ST_TOUCHES "_st_touches" #define N_ALIGN_DATE4CMP "align_date4cmp" +#define N_ARRAY "array" +#define N_ARRAY_CONTAINS "array_contains" // for lock function #define N_GET_LOCK "get_lock" diff --git a/deps/oblib/src/lib/udt/ob_array_type.cpp b/deps/oblib/src/lib/udt/ob_array_type.cpp new file mode 100644 index 0000000000..ec15654362 --- /dev/null +++ b/deps/oblib/src/lib/udt/ob_array_type.cpp @@ -0,0 +1,1242 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX LIB +#include "ob_array_type.h" +#include "lib/ob_errno.h" + +namespace oceanbase { +namespace common { + +#define CONSTRUCT_FIXED_ARRAY_OBJ(Element_Type) \ + void *buf = alloc.alloc(sizeof(ObArrayFixedSize)); \ + if (OB_ISNULL(buf)) { \ + ret = OB_ALLOCATE_MEMORY_FAILED; \ + OB_LOG(WARN, "alloc memory failed", K(ret), K(array_meta.type_id_)); \ + } else { \ + ObArrayFixedSize *arr_ptr = new (buf) ObArrayFixedSize(); \ + if (read_only) { \ + } else if (OB_ISNULL(buf = alloc.alloc(sizeof(ObArrayData)))) { \ + ret = OB_ALLOCATE_MEMORY_FAILED; \ + OB_LOG(WARN, "alloc memory failed", K(ret), K(array_meta.type_id_)); \ + } else { \ + ObArrayData *arr_data = new (buf) ObArrayData(alloc); \ + arr_ptr->set_array_data(arr_data); \ + } \ + if (OB_SUCC(ret)) { \ + arr_obj = arr_ptr; \ + } \ + } + +#define CONSTRUCT_ARRAY_OBJ(Array_Type, Element_Type) \ + void *buf = alloc.alloc(sizeof(Array_Type)); \ + if (OB_ISNULL(buf)) { \ + ret = OB_ALLOCATE_MEMORY_FAILED; \ + OB_LOG(WARN, "alloc memory failed", K(ret), K(array_meta.type_id_)); \ + } else { \ + Array_Type *arr_ptr = new (buf) Array_Type(); \ + if (read_only) { \ + } else if (OB_ISNULL(buf = alloc.alloc(sizeof(ObArrayData)))) { \ + ret = OB_ALLOCATE_MEMORY_FAILED; \ + OB_LOG(WARN, "alloc memory failed", K(ret), K(array_meta.type_id_)); \ + } else { \ + ObArrayData *arr_data = new (buf) ObArrayData(alloc); \ + arr_ptr->set_array_data(arr_data); \ + } \ + if (OB_SUCC(ret)) { \ + arr_obj = arr_ptr; \ + } \ + } + +int ObArrayTypeObjFactory::construct(common::ObIAllocator &alloc, const ObCollectionTypeBase &array_meta, + ObIArrayType *&arr_obj, bool read_only) +{ + int ret = OB_SUCCESS; + if (array_meta.type_id_ == ObNestedType::OB_ARRAY_TYPE) { + const ObCollectionArrayType *arr_type = static_cast(&array_meta); + if (arr_type->element_type_->type_id_ == ObNestedType::OB_BASIC_TYPE) { + ObCollectionBasicType *elem_type = static_cast(arr_type->element_type_); + switch (elem_type->basic_meta_.get_obj_type()) { + case ObNullType: { + CONSTRUCT_FIXED_ARRAY_OBJ(int8_t); + break; + } + case ObTinyIntType: { + CONSTRUCT_FIXED_ARRAY_OBJ(int8_t); + break; + } + case ObSmallIntType: { + CONSTRUCT_FIXED_ARRAY_OBJ(int16_t); + break; + } + case ObInt32Type: { + CONSTRUCT_FIXED_ARRAY_OBJ(int32_t); + break; + } + case ObIntType: { + CONSTRUCT_FIXED_ARRAY_OBJ(int64_t); + break; + } + case ObUTinyIntType: { + CONSTRUCT_FIXED_ARRAY_OBJ(uint8_t); + break; + } + case ObUSmallIntType: { + CONSTRUCT_FIXED_ARRAY_OBJ(uint16_t); + break; + } + case ObUInt32Type: { + CONSTRUCT_FIXED_ARRAY_OBJ(uint32_t); + break; + } + case ObUInt64Type: { + CONSTRUCT_FIXED_ARRAY_OBJ(uint64_t); + break; + } + case ObFloatType: { + CONSTRUCT_FIXED_ARRAY_OBJ(float); + break; + } + case ObDoubleType: { + CONSTRUCT_FIXED_ARRAY_OBJ(double); + break; + } + case ObDecimalIntType: { + ObPrecision preci = elem_type->basic_meta_.get_precision(); + if (get_decimalint_type(preci) == DECIMAL_INT_32) { + CONSTRUCT_FIXED_ARRAY_OBJ(int32_t); + } else if (get_decimalint_type(preci) == DECIMAL_INT_64) { + CONSTRUCT_FIXED_ARRAY_OBJ(int64_t); + } else if (get_decimalint_type(preci) == DECIMAL_INT_128) { + CONSTRUCT_FIXED_ARRAY_OBJ(int128_t); + } else if (get_decimalint_type(preci) == DECIMAL_INT_256) { + CONSTRUCT_FIXED_ARRAY_OBJ(int256_t); + } else if (get_decimalint_type(preci) == DECIMAL_INT_512) { + CONSTRUCT_FIXED_ARRAY_OBJ(int512_t); + } else { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected precision", K(ret), K(preci)); + } + if (OB_SUCC(ret)) { + arr_obj->set_scale(elem_type->basic_meta_.get_scale()); + } + break; + } + case ObVarcharType : { + CONSTRUCT_ARRAY_OBJ(ObArrayBinary, char); + break; + } + default: { + ret = OB_NOT_SUPPORTED; + OB_LOG(WARN, "unsupported type", K(ret), K(elem_type->basic_meta_.get_obj_type())); + } + } + if (OB_SUCC(ret)) { + arr_obj->set_element_type(static_cast(elem_type->basic_meta_.get_obj_type())); + } + } else if (array_meta.type_id_ == ObNestedType::OB_ARRAY_TYPE) { + CONSTRUCT_ARRAY_OBJ(ObArrayNested, char); + ObIArrayType *arr_child = NULL; + if (FAILEDx(construct(alloc, *arr_type->element_type_, arr_child, read_only))) { + OB_LOG(WARN, "failed to construct child element", K(ret), K(array_meta.type_id_)); + } else { + arr_obj->set_element_type(static_cast(ObCollectionSQLType)); + ObArrayNested *nested_arr = static_cast(arr_obj); + nested_arr->set_child_array(arr_child); + } + } + } else if (array_meta.type_id_ == ObNestedType::OB_VECTOR_TYPE) { + CONSTRUCT_ARRAY_OBJ(ObVectorData, float); + if (OB_SUCC(ret)) { + arr_obj->set_element_type(static_cast(ObFloatType)); + } + } else { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected collect info type", K(ret), K(array_meta.type_id_)); + } + return ret; +} + +int ObArrayUtil::get_type_name(const ObDataType &elem_type, char *buf, int buf_len, uint32_t depth) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + for (uint32_t i = 0; OB_SUCC(ret) && i < depth; i++) { + if (OB_FAIL(databuff_printf(buf, buf_len, pos, "ARRAY("))) { + LOG_WARN("failed to convert len to string", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "%s", ob_sql_type_str(elem_type.get_obj_type())))) { + LOG_WARN("failed to convert len to string", K(ret)); + } else if (elem_type.get_obj_type() == ObDecimalIntType + && OB_FAIL(databuff_printf(buf, buf_len, pos, "(%d,%d)", elem_type.get_precision(), elem_type.get_scale()))) { + LOG_WARN("failed to add deciaml precision to string", K(ret)); + } else if (ob_is_string_tc(elem_type.get_obj_type()) + && OB_FAIL(databuff_printf(buf, buf_len, pos, "(%d)", elem_type.get_length()))) { + LOG_WARN("failed to add string len to string", K(ret)); + } + for (uint32_t i = 0; OB_SUCC(ret) && i < depth; i++) { + if (OB_FAIL(databuff_printf(buf, buf_len, pos, ")"))) { + LOG_WARN("failed to add ) to string", K(ret)); + } + } + return ret; +} + +int ObArrayUtil::push_back_decimal_int(const ObPrecision prec, const ObDecimalInt *dec_val, bool is_null, ObIArrayType *arr_obj) +{ + int ret = OB_SUCCESS; + if (get_decimalint_type(prec) == DECIMAL_INT_32) { + ObArrayFixedSize *arr = static_cast *>(arr_obj); + if (is_null) { + if (OB_FAIL(arr->push_back(0, true))) { + LOG_WARN("failed to push back null value", K(ret)); + } + } else if (OB_FAIL(arr->push_back(dec_val->int32_v_[0]))) { + LOG_WARN("failed to push back decimal int32 value", K(ret), K(dec_val->int32_v_[0])); + } + } else if (get_decimalint_type(prec) == DECIMAL_INT_64) { + ObArrayFixedSize *arr = static_cast *>(arr_obj); + if (is_null) { + if (OB_FAIL(arr->push_back(0, true))) { + LOG_WARN("failed to push back null value", K(ret)); + } + } else if (OB_FAIL(arr->push_back(dec_val->int64_v_[0]))) { + LOG_WARN("failed to push back decimal int64 value", K(ret), K(dec_val->int64_v_[0])); + } + } else if (get_decimalint_type(prec) == DECIMAL_INT_128) { + ObArrayFixedSize *arr = static_cast *>(arr_obj); + if (is_null) { + if (OB_FAIL(arr->push_back(0, true))) { + LOG_WARN("failed to push back null value", K(ret)); + } + } else if (OB_FAIL(arr->push_back(dec_val->int128_v_[0]))) { + LOG_WARN("failed to push back decimal int128 value", K(ret), K(dec_val->int128_v_[0])); + } + } else if (get_decimalint_type(prec) == DECIMAL_INT_256) { + ObArrayFixedSize *arr = static_cast *>(arr_obj); + if (is_null) { + if (OB_FAIL(arr->push_back(0, true))) { + LOG_WARN("failed to push back null value", K(ret)); + } + } else if (OB_FAIL(arr->push_back(dec_val->int256_v_[0]))) { + LOG_WARN("failed to push back decimal int256 value", K(ret), K(dec_val->int256_v_[0])); + } + } else if (get_decimalint_type(prec) == DECIMAL_INT_512) { + ObArrayFixedSize *arr = static_cast *>(arr_obj); + if (is_null) { + if (OB_FAIL(arr->push_back(0, true))) { + LOG_WARN("failed to push back null value", K(ret)); + } + } else if (OB_FAIL(arr->push_back(dec_val->int512_v_[0]))) { + LOG_WARN("failed to push back decimal int512 value", K(ret), K(dec_val->int512_v_[0])); + } + } else { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected precision", K(ret), K(prec)); + } + return ret; +} + +// convert collection bin to string (for liboblog) +int ObArrayUtil::convert_collection_bin_to_string(const ObString &collection_bin, + const common::ObIArray &extended_type_info, + common::ObIAllocator &allocator, + ObString &res_str) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(extended_type_info.count() != 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid extended type info for collection type", K(ret), K(extended_type_info.count())); + } else { + ObSqlCollectionInfo type_info_parse(allocator); + ObString collection_type_name = extended_type_info.at(0); + type_info_parse.set_name(collection_type_name); + if (OB_FAIL(type_info_parse.parse_type_info())) { + LOG_WARN("fail to parse type info", K(ret), K(collection_type_name)); + } else if (OB_ISNULL(type_info_parse.collection_meta_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("collection meta is null", K(ret), K(collection_type_name)); + } else { + ObCollectionArrayType *arr_type = nullptr; + ObIArrayType *arr_obj = nullptr; + ObStringBuffer buf(&allocator); + if (OB_ISNULL(arr_type = static_cast(type_info_parse.collection_meta_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("collection meta is null", K(ret), K(collection_type_name)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(allocator, *arr_type, arr_obj, true))) { + LOG_WARN("construct array obj failed", K(ret), K(type_info_parse)); + } else if (OB_ISNULL(arr_obj)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("arr_obj is null", K(ret), K(collection_type_name)); + } else { + ObString raw_binary = collection_bin; + if (OB_FAIL(arr_obj->init(raw_binary))) { + LOG_WARN("failed to init array", K(ret)); + } else if (OB_FAIL(arr_obj->print(arr_type->element_type_, buf))) { + LOG_WARN("failed to format array", K(ret)); + } else { + res_str.assign_ptr(buf.ptr(), buf.length()); + } + } + } + } + return ret; +} + +// determine a collection type is vector or array +int ObArrayUtil::get_mysql_type(const common::ObIArray &extended_type_info, + obmysql::EMySQLFieldType &type) +{ + int ret = OB_SUCCESS; + type = obmysql::MYSQL_TYPE_NOT_DEFINED; + ObArenaAllocator tmp_allocator("OB_ARRAY_UTIL"); + if (OB_UNLIKELY(extended_type_info.count() != 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid extended type info for collection type", K(ret), K(extended_type_info.count())); + } else { + ObSqlCollectionInfo type_info_parse(tmp_allocator); + ObString collection_type_name = extended_type_info.at(0); + type_info_parse.set_name(collection_type_name); + if (OB_FAIL(type_info_parse.parse_type_info())) { + LOG_WARN("fail to parse type info", K(ret), K(collection_type_name)); + } else if (OB_ISNULL(type_info_parse.collection_meta_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("collection meta is null", K(ret), K(collection_type_name)); + } else { + uint16_t detail_type = type_info_parse.collection_meta_->type_id_; + if (detail_type == OB_ARRAY_TYPE) { + type = obmysql::MYSQL_TYPE_OB_ARRAY; + } else if (detail_type == OB_VECTOR_TYPE) { + type = obmysql::MYSQL_TYPE_OB_VECTOR; + } else { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected collection type", K(ret), K(detail_type)); + } + } + } + tmp_allocator.reset(); + return ret; +} + +int ObVectorData::push_back(float value) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else if (length_ + 1 > MAX_ARRAY_ELEMENT_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array element size exceed max", K(ret), K(length_), K(MAX_ARRAY_ELEMENT_SIZE)); + } else if (OB_FAIL(data_container_->raw_data_.push_back(value))) { + OB_LOG(WARN, "failed to push value to array data", K(ret)); + } else if (get_raw_binary_len() > MAX_ARRAY_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "vector data length exceed max", K(ret), K(get_raw_binary_len()), K(MAX_ARRAY_SIZE)); + } else { + length_++; + } + return ret; +} + +int ObVectorData::print(const ObCollectionTypeBase *elem_type, ObStringBuffer &format_str, uint32_t begin, uint32_t print_size) const +{ + int ret = OB_SUCCESS; + UNUSED(elem_type); + if (OB_FAIL(format_str.append("["))) { + OB_LOG(WARN, "fail to append [", K(ret)); + } else { + if (print_size == 0) { + // print whole array + print_size = length_; + } + for (int i = begin; i < begin + print_size && OB_SUCC(ret); i++) { + if (i > begin && OB_FAIL(format_str.append(","))) { + OB_LOG(WARN, "fail to append \",\" to buffer", K(ret)); + } else { + int buf_size = FLOAT_TO_STRING_CONVERSION_BUFFER_SIZE; + if (OB_FAIL(format_str.reserve(buf_size + 1))) { + OB_LOG(WARN, "fail to reserve memory for format_str", K(ret)); + } else { + char *start = format_str.ptr() + format_str.length(); + uint64_t len = ob_gcvt(data_[i], ob_gcvt_arg_type::OB_GCVT_ARG_FLOAT, buf_size, start, NULL); + if (OB_FAIL(format_str.set_length(format_str.length() + len))) { + OB_LOG(WARN, "fail to set format_str len", K(ret), K(format_str.length()), K(len)); + } + } + } + } + } + if (OB_SUCC(ret) && OB_FAIL(format_str.append("]"))) { + OB_LOG(WARN, "fail to append ]", K(ret)); + } + return ret; +} + +int ObVectorData::get_raw_binary(char *res_buf, int64_t buf_len) +{ + int ret = OB_SUCCESS; + if (get_raw_binary_len() > buf_len) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "buf len isn't enough", K(ret), K(buf_len)); + } else if (data_container_ == NULL) { + MEMCPY(res_buf, reinterpret_cast(data_), sizeof(float) * length_); + } else { + MEMCPY(res_buf, + reinterpret_cast(data_container_->raw_data_.get_data()), + sizeof(float) * data_container_->raw_data_.size()); + } + return ret; +} + +int ObVectorData::init() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else { + length_ = data_container_->raw_data_.size(); + data_ = data_container_->raw_data_.get_data(); + } + return ret; +} + +int ObVectorData::init(ObString &raw_data) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + char *raw_str = raw_data.ptr(); + if (raw_data.length() % sizeof(float) != 0) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "raw data len is invalid", K(ret), K(raw_data.length())); + } else { + length_ = raw_data.length() / sizeof(float); + data_ = reinterpret_cast(raw_str); + } + return ret; +} + +int ObVectorData::init(ObDatum *attrs, uint32_t attr_count, bool with_length) +{ + int ret = OB_SUCCESS; + // attrs of vector are same as array now, maybe optimize later + const uint32_t count = with_length ? 3 : 2; + if (attr_count != count) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected attrs", K(ret), K(attr_count), K(count)); + } else { + data_ = const_cast(reinterpret_cast(attrs[count - 1].get_string().ptr())); + length_ = attrs[count - 1].get_int_bytes() / sizeof(float); + } + return ret; +} + +int ObVectorData::check_validity(const ObCollectionArrayType &arr_type, const ObIArrayType &array) const +{ + int ret = OB_SUCCESS; + if (arr_type.dim_cnt_ != array.size()) { + ret = OB_ERR_INVALID_VECTOR_DIM; + LOG_WARN("invalid vector dimension", K(ret), K(arr_type.dim_cnt_), K(array.size())); + } + return ret; +} + +int ObVectorData::insert_from(const ObIArrayType &src, uint32_t begin, uint32_t len) +{ +int ret = OB_SUCCESS; + if (src.get_format() != get_format() + || src.get_element_type() != element_type_) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "inconsistent array type", K(ret), K(src.get_format()), K(src.get_element_type()), + K(get_format()), K(element_type_)); + } else if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else { + const uint32_t src_data_offset = begin * sizeof(float); + int64_t curr_pos = data_container_->raw_data_.size(); + int64_t capacity = curr_pos + len; + data_container_->raw_data_.prepare_allocate(capacity); + char *cur_data = reinterpret_cast(data_container_->raw_data_.get_data() + curr_pos); + MEMCPY(cur_data, src.get_data() + src_data_offset, len * sizeof(float)); + length_ += len; + } + return ret; +} + +void ObVectorData::clear() +{ + data_ = nullptr; + length_ = 0; + if (OB_NOT_NULL(data_container_)) { + data_container_->clear(); + } +} + +int ObVectorData::flatten(ObArrayAttr *attrs, uint32_t attr_count, uint32_t &attr_idx) +{ + int ret = OB_SUCCESS; + const uint32_t len = 2; + if (len + attr_idx >= attr_count) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected attr count", K(ret), K(attr_count), K(attr_idx), K(len)); + } else { + attrs[attr_idx].ptr_ = nullptr; + attrs[attr_idx].length_ = 0; + attr_idx++; // skip null + attrs[attr_idx].ptr_ = reinterpret_cast(data_); + attrs[attr_idx].length_ = sizeof(float) * length_; + attr_idx++; + } + return ret; +} + +int ObVectorData::compare_at(uint32_t left_begin, uint32_t left_len, uint32_t right_begin, uint32_t right_len, + const ObIArrayType &right, int &cmp_ret) +{ + int ret = OB_SUCCESS; + const ObVectorData *right_data = dynamic_cast(&right); + if (OB_ISNULL(right_data)) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + OB_LOG(WARN, "invalid array type", K(ret), K(right.get_format()), K(this->get_format())); + } else { + uint32_t cmp_len = std::min(left_len, right_len); + cmp_ret = 0; + for (uint32_t i = 0; i < cmp_len && !cmp_ret; ++i) { + if (data_[left_begin + i] != (*right_data)[right_begin + i]) { + cmp_ret = data_[left_begin + i] > (*right_data)[right_begin + i] ? 1 : -1; + } + } + if (cmp_ret == 0 && left_len != right_len) { + cmp_ret = left_len > right_len ? 1 : -1; + } + } + return ret; +} + +int ObVectorData::compare(const ObIArrayType &right, int &cmp_ret) +{ + return compare_at(0, this->length_, 0, right.size(), right, cmp_ret); +} + +int ObArrayBinary::push_back(const ObString &value, bool is_null) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else if (length_ + 1 > MAX_ARRAY_ELEMENT_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array element size exceed max", K(ret), K(length_), K(MAX_ARRAY_ELEMENT_SIZE)); + } else { + uint32_t last_offset = data_container_->raw_data_.size(); + if (is_null) { + // push back null + if (OB_FAIL(push_null())) { + OB_LOG(WARN, "failed to push null", K(ret)); + } + } else if (OB_FAIL(data_container_->offsets_.push_back(last_offset + value.length()))) { + OB_LOG(WARN, "failed to push value to array data", K(ret)); + } else if (OB_FAIL(data_container_->null_bitmaps_.push_back(0))) { + OB_LOG(WARN, "failed to push null", K(ret)); + } else { + for (uint32_t i = 0; i < value.length() && OB_SUCC(ret); ++i) { + if (OB_FAIL(data_container_->raw_data_.push_back(value[i]))) { + OB_LOG(WARN, "failed to push value to array data", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (get_raw_binary_len() > MAX_ARRAY_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array data length exceed max", K(ret), K(get_raw_binary_len()), K(MAX_ARRAY_SIZE)); + } else { + length_++; + } + } + } + return ret; +} + +int ObArrayBinary::insert_from(const ObIArrayType &src, uint32_t begin, uint32_t len) +{ + int ret = OB_SUCCESS; + if (src.get_format() != get_format() + || src.get_element_type() != element_type_) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "inconsistent array type", K(ret), K(src.get_format()), K(src.get_element_type()), + K(get_format()), K(element_type_)); + } else if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else { + // insert data + const uint32_t src_offset = offset_at(begin, src.get_offsets()); + uint32_t src_len = src.get_offsets()[begin + len - 1] - src_offset; + int64_t curr_pos = data_container_->raw_data_.size(); + int64_t capacity = curr_pos + src_len; + data_container_->raw_data_.prepare_allocate(capacity); + char *cur_data = data_container_->raw_data_.get_data() + curr_pos; + MEMCPY(cur_data, src.get_data() + src_offset, src_len); + // insert offsets + uint32_t last_offset = src_offset; + uint32_t pre_max_offset = data_container_->offset_at(length_); + for (uint32_t i = 0; i < len && OB_SUCC(ret); ++i) { + if (OB_FAIL(data_container_->offsets_.push_back(pre_max_offset + src.get_offsets()[begin + i] - last_offset))) { + OB_LOG(WARN, "failed to push value to array data", K(ret)); + } else { + last_offset = src.get_offsets()[begin + i]; + pre_max_offset = data_container_->offset_at(data_container_->offsets_.size()); + } + } + // insert nullbitmaps + for (uint32_t i = 0; i < len && OB_SUCC(ret); ++i) { + if (OB_FAIL(data_container_->null_bitmaps_.push_back(src.get_nullbitmap()[begin + i]))) { + OB_LOG(WARN, "failed to push null", K(ret)); + } + } + if (OB_SUCC(ret)) { + length_ += len; + } + } + return ret; +} + +ObString ObArrayBinary::operator[](const int64_t i) const +{ + ObString str; + uint32_t last_offset = offset_at(i, offsets_); + if (i >= 0 && i < length_) { + uint32_t offset = offsets_[i]; + str.assign_ptr(&data_[last_offset], offset - last_offset); + } + return str; +} + +int ObArrayBinary::get_data_binary(char *res_buf, int64_t buf_len) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + if (get_data_binary_len() > buf_len) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "buf len isn't enough", K(ret), K(buf_len)); + } else if (data_container_ == NULL) { + uint32_t last_idx = length_ > 0 ? length_ - 1 : 0; + MEMCPY(res_buf + pos, reinterpret_cast(null_bitmaps_), sizeof(uint8_t) * length_); + pos += sizeof(uint8_t) * length_; + MEMCPY(res_buf + pos, reinterpret_cast(offsets_), sizeof(uint32_t) * length_); + pos += sizeof(uint32_t) * length_; + MEMCPY(res_buf + pos, data_, offsets_[last_idx]); + } else { + MEMCPY(res_buf + pos, reinterpret_cast(data_container_->null_bitmaps_.get_data()), sizeof(uint8_t) * data_container_->null_bitmaps_.size()); + pos += sizeof(uint8_t) * data_container_->null_bitmaps_.size(); + MEMCPY(res_buf + pos, reinterpret_cast(data_container_->offsets_.get_data()), sizeof(uint32_t) * data_container_->offsets_.size()); + pos += sizeof(uint32_t) * data_container_->offsets_.size(); + MEMCPY(res_buf + pos, reinterpret_cast(data_container_->raw_data_.get_data()), data_container_->raw_data_.size()); + } + return ret; +} + +int ObArrayBinary::get_raw_binary(char *res_buf, int64_t buf_len) +{ + int ret = OB_SUCCESS; + if (get_raw_binary_len() > buf_len) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "buf len isn't enough", K(ret), K(buf_len)); + } else { + int64_t pos = 0; + MEMCPY(res_buf + pos, &length_, sizeof(length_)); + pos += sizeof(length_); + if (OB_FAIL(get_data_binary(res_buf + pos, buf_len - pos))) { + OB_LOG(WARN, "get data binary failed", K(ret), K(buf_len)); + } + } + return ret; +} + +int ObArrayBinary::init() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else { + length_ = data_container_->offsets_.size(); + offsets_ = data_container_->offsets_.get_data(); + null_bitmaps_ = data_container_->null_bitmaps_.get_data(); + data_ = data_container_->raw_data_.get_data(); + } + return ret; +} + +int ObArrayBinary::init(ObString &raw_data) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + char *raw_str = raw_data.ptr(); + if (raw_data.length() < sizeof(length_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "raw data len is invalid", K(ret), K(raw_data.length())); + } else { + length_ = *reinterpret_cast(raw_str); + if (length_ > 0) { + pos += sizeof(length_); + // init null bitmap + null_bitmaps_ = reinterpret_cast(raw_str + pos); + if (pos + sizeof(uint8_t) * length_ > raw_data.length()) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "raw data len is invalid", K(ret), K(pos), K(length_), K(raw_data.length())); + } else { + pos += sizeof(uint8_t) * length_; + if (pos + sizeof(uint32_t) * length_ > raw_data.length()) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "raw data len is invalid", K(ret), K(pos), K(length_), K(raw_data.length())); + } else { + // init offset + offsets_ = reinterpret_cast(raw_str + pos); + pos += sizeof(uint32_t) * length_; + // init data + data_ = reinterpret_cast(raw_str + pos); + // last offset should be equal to data_ length + if (offsets_[length_ - 1] != raw_data.length() - pos) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "raw data len is invalid", K(ret), K(pos), K(length_), K(raw_data.length())); + } + } + } + } + } + return ret; +} + +int ObArrayBinary::init(ObDatum *attrs, uint32_t attr_count, bool with_length) +{ + int ret = OB_SUCCESS; + const uint32_t count = with_length ? 4 : 3; + if (attr_count != count) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected attrs", K(ret), K(attr_count), K(count)); + } else { + uint32_t idx = 0; + if (with_length) { + length_ = attrs[idx++].get_uint32(); + } else { + length_ = attrs[0].get_int_bytes() / sizeof(uint8_t); + } + null_bitmaps_ = const_cast(reinterpret_cast(attrs[idx++].get_string().ptr())); + offsets_ = const_cast(reinterpret_cast(attrs[idx++].get_string().ptr())); + data_ = const_cast(reinterpret_cast(attrs[idx++].get_string().ptr())); + if ((with_length && (length_ != attrs[1].get_int_bytes() / sizeof(uint8_t) || length_ != attrs[2].get_int_bytes() / sizeof(uint32_t))) + || (!with_length && (length_ != attrs[1].get_int_bytes() / sizeof(uint32_t)))) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected attrs", K(ret), K(with_length), K(length_)); + } + } + return ret; +} + +int ObArrayBinary::push_null() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else if (length_ + 1 > MAX_ARRAY_ELEMENT_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array element size exceed max", K(ret), K(length_), K(MAX_ARRAY_ELEMENT_SIZE)); + } else { + uint32_t last_offset = data_container_->raw_data_.size(); + if (OB_FAIL(data_container_->null_bitmaps_.push_back(1))) { + // push back null + OB_LOG(WARN, "failed to push null", K(ret)); + } else if (OB_FAIL(data_container_->offsets_.push_back(last_offset))) { + OB_LOG(WARN, "failed to push value to array data", K(ret)); + } else if (get_raw_binary_len() > MAX_ARRAY_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array data length exceed max", K(ret), K(get_raw_binary_len()), K(MAX_ARRAY_SIZE)); + } else { + length_++; + } + } + return ret; +} + +int ObArrayBinary::print(const ObCollectionTypeBase *elem_type, ObStringBuffer &format_str, uint32_t begin, uint32_t print_size) const +{ + int ret = OB_SUCCESS; + UNUSED(elem_type); + if (OB_FAIL(format_str.append("["))) { + OB_LOG(WARN, "fail to append [", K(ret)); + } else { + if (print_size == 0) { + // print whole array + print_size = length_; + } + for (int i = begin; i < begin + print_size && OB_SUCC(ret); i++) { + if (i > begin && OB_FAIL(format_str.append(","))) { + OB_LOG(WARN, "fail to append \",\" to buffer", K(ret)); + } else if (null_bitmaps_[i]) { + // value is null + if (OB_FAIL(format_str.append("NULL"))) { + OB_LOG(WARN, "fail to append NULL to buffer", K(ret)); + } + } else if (OB_FAIL(format_str.append("\""))) { + OB_LOG(WARN, "fail to append \"\"\" to buffer", K(ret)); + } else if (OB_FAIL(format_str.append((*this)[i]))) { + OB_LOG(WARN, "fail to append string to format_str", K(ret)); + } else if (OB_FAIL(format_str.append("\""))) { + OB_LOG(WARN, "fail to append \"\"\" to buffer", K(ret)); + } + } + } + if (OB_SUCC(ret) && OB_FAIL(format_str.append("]"))) { + OB_LOG(WARN, "fail to append ]", K(ret)); + } + return ret; +} + +void ObArrayBinary::clear() +{ + data_ = nullptr; + null_bitmaps_ = nullptr; + offsets_ = nullptr; + length_ = 0; + if (OB_NOT_NULL(data_container_)) { + data_container_->clear(); + } +} + +int ObArrayBinary::flatten(ObArrayAttr *attrs, uint32_t attr_count, uint32_t &attr_idx) +{ + int ret = OB_SUCCESS; + const uint32_t len = 3; + if (len >= attr_count) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected attr count", K(ret), K(attr_count), K(attr_idx), K(len)); + } else { + attrs[attr_idx].ptr_ = reinterpret_cast(null_bitmaps_); + attrs[attr_idx].length_ = sizeof(uint8_t) * length_; + attr_idx++; + attrs[attr_idx].ptr_ = reinterpret_cast(offsets_); + attrs[attr_idx].length_ = sizeof(uint32_t) * length_; + attr_idx++; + attrs[attr_idx].ptr_ = data_; + attrs[attr_idx].length_ = offsets_[length_ - 1]; + attr_idx++; + } + return ret; +} + +int ObArrayBinary::compare_at(uint32_t left_begin, uint32_t left_len, + uint32_t right_begin, uint32_t right_len, + const ObIArrayType &right, int &cmp_ret) +{ + int ret = OB_SUCCESS; + uint32_t cmp_len = std::min(left_len, right_len); + cmp_ret = 0; + for (uint32_t i = 0; i < cmp_len && !cmp_ret && OB_SUCC(ret); ++i) { + if (this->is_null(left_begin + i) && !right.is_null(right_begin + i)) { + cmp_ret = 1; + } else if (!this->is_null(left_begin + i) && right.is_null(right_begin + i)) { + cmp_ret = -1; + } else if (this->is_null(left_begin + i) && right.is_null(right_begin + i)) { + } else { + const ObArrayBinary *right_data = dynamic_cast(&right); + uint32_t l_start = offset_at(left_begin + i, get_offsets()); + uint32_t l_child_len = get_offsets()[left_begin + i] - l_start; + uint32_t r_start = right_data->offset_at(right_begin + i, right_data->get_offsets()); + uint32_t r_child_len = right_data->get_offsets()[right_begin + i] - r_start; + if (OB_ISNULL(right_data)) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + OB_LOG(WARN, "invalid array type", K(ret), K(right.get_format()), K(this->get_format())); + } else { + uint32_t data_len = std::min(l_child_len, r_child_len); + cmp_ret = MEMCMP(data_ + l_start, right_data->get_data() + r_start, data_len); + if (!cmp_ret && l_child_len != r_child_len) { + cmp_ret = l_child_len > r_child_len ? 1 : -1; + } + } + } + } + if (!cmp_ret && OB_SUCC(ret) && left_len != right_len) { + cmp_ret = left_len > right_len ? 1 : -1; + } + return ret; +} + +int ObArrayBinary::compare(const ObIArrayType &right, int &cmp_ret) +{ + return compare_at(0, length_, 0, right.size(), right, cmp_ret); +} + +int ObArrayNested::get_data_binary(char *res_buf, int64_t buf_len) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + if (get_data_binary_len() > buf_len) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "buf len isn't enough", K(ret), K(buf_len)); + } else if (data_container_ == NULL) { + MEMCPY(res_buf + pos, reinterpret_cast(null_bitmaps_), sizeof(uint8_t) * length_); + pos += sizeof(uint8_t) * length_; + MEMCPY(res_buf + pos, reinterpret_cast(offsets_), sizeof(uint32_t) * length_); + pos += sizeof(uint32_t) * length_; + } else { + MEMCPY(res_buf + pos, reinterpret_cast(data_container_->null_bitmaps_.get_data()), sizeof(uint8_t) * data_container_->null_bitmaps_.size()); + pos += sizeof(uint8_t) * data_container_->null_bitmaps_.size(); + MEMCPY(res_buf + pos, reinterpret_cast(data_container_->offsets_.get_data()), sizeof(uint32_t) * data_container_->offsets_.size()); + pos += sizeof(uint32_t) * data_container_->offsets_.size(); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(data_->get_data_binary(res_buf + pos, buf_len - pos))) { + OB_LOG(WARN, "get data binary failed", K(ret), K(pos), K(length_), K(buf_len)); + } + return ret; +} + +int ObArrayNested::get_raw_binary(char *res_buf, int64_t buf_len) +{ + int ret = OB_SUCCESS; + if (get_raw_binary_len() > buf_len) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "buf len isn't enough", K(ret), K(buf_len)); + } else { + int64_t pos = 0; + MEMCPY(res_buf + pos, &length_, sizeof(length_)); + pos += sizeof(length_); + if (OB_FAIL(get_data_binary(res_buf + pos, buf_len - pos))) { + OB_LOG(WARN, "get data binary failed", K(ret), K(buf_len)); + } + } + return ret; +} + +int ObArrayNested::insert_from(const ObIArrayType &src, uint32_t begin, uint32_t len) +{ + int ret = OB_SUCCESS; + if (src.get_format() != get_format() + || src.get_element_type() != element_type_) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "inconsistent array type", K(ret), K(src.get_format()), K(src.get_element_type()), + K(get_format()), K(element_type_)); + } else if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else { + // insert offsets + uint32_t last_offset = offset_at(begin, src.get_offsets()); + uint32_t pre_max_offset = data_container_->offset_at(length_); + for (uint32_t i = 0; i < len && OB_SUCC(ret); ++i) { + if (OB_FAIL(data_container_->offsets_.push_back(pre_max_offset + src.get_offsets()[begin + i] - last_offset))) { + OB_LOG(WARN, "failed to push value to array data", K(ret)); + } else { + last_offset = src.get_offsets()[begin + i]; + pre_max_offset = data_container_->offset_at(data_container_->offsets_.size()); + } + } + // insert nullbitmaps + for (uint32_t i = 0; i < len && OB_SUCC(ret); ++i) { + if (OB_FAIL(data_container_->null_bitmaps_.push_back(src.get_nullbitmap()[begin + i]))) { + OB_LOG(WARN, "failed to push null", K(ret)); + } + } + // insert data + if (OB_SUCC(ret)) { + uint32_t start = offset_at(begin, src.get_offsets()); + uint32_t child_len = src.get_offsets()[begin + len - 1] - start; + const ObIArrayType *child_arr = static_cast(src).get_child_array(); + if (OB_FAIL(data_->insert_from(*child_arr, start, child_len))) { + OB_LOG(WARN, "failed to insert child array", K(ret)); + } else { + length_ += len; + } + } + } + return ret; +} + +int ObArrayNested::init() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else { + length_ = data_container_->offsets_.size(); + offsets_ = data_container_->offsets_.get_data(); + null_bitmaps_ = data_container_->null_bitmaps_.get_data(); + if (data_ != NULL) { + data_->init(); + } + } + return ret; +} + +int ObArrayNested::init(ObString &raw_data) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + char *raw_str = raw_data.ptr(); + if (raw_data.length() < sizeof(length_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "raw data len is invalid", K(ret), K(raw_data.length())); + } else { + length_ = *reinterpret_cast(raw_str); + if (length_ > 0) { + pos += sizeof(length_); + // init null bitmap + null_bitmaps_ = reinterpret_cast(raw_str + pos); + if (pos + sizeof(uint8_t) * length_ > raw_data.length()) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "raw data len is invalid", K(ret), K(pos), K(length_), K(raw_data.length())); + } else { + pos += sizeof(uint8_t) * length_; + if (pos + sizeof(uint32_t) * length_ > raw_data.length()) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "raw data len is invalid", K(ret), K(pos), K(length_), K(raw_data.length())); + } else { + // init offset + offsets_ = reinterpret_cast(raw_str + pos); + // caution : length_ - 1 means : last offset is length of data_ + pos += sizeof(uint32_t) * (length_ - 1); + // init data + ObString data_str(raw_data.length() - pos, raw_str + pos); + if (OB_FAIL(data_->init(data_str))) { + OB_LOG(WARN, "data init failed", K(ret), K(pos), K(length_), K(raw_data.length())); + } + } + } + } + } + return ret; +} + +int ObArrayNested::init(ObDatum *attrs, uint32_t attr_count, bool with_length) +{ + int ret = OB_SUCCESS; + const uint32_t count = with_length ? 4 : 3; + if (attr_count < count) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected attrs", K(ret), K(attr_count), K(count)); + } else { + uint32_t idx = 0; + if (with_length) { + length_ = attrs[idx++].get_uint32(); + } else { + length_ = attrs[0].get_int_bytes() / sizeof(uint8_t); + } + null_bitmaps_ = const_cast(reinterpret_cast(attrs[idx++].get_string().ptr())); + offsets_ = const_cast(reinterpret_cast(attrs[idx++].get_string().ptr())); + if (OB_FAIL(data_->init(attrs + idx, attr_count - idx, false))) { + OB_LOG(WARN, "failed to init attrs", K(ret), K(attr_count), K(count)); + } + if ((with_length && (length_ != attrs[1].get_int_bytes() / sizeof(uint8_t) || length_ != attrs[2].get_int_bytes() / sizeof(uint32_t))) + || (!with_length && (length_ != attrs[1].get_int_bytes() / sizeof(uint32_t)))) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected attrs", K(ret), K(with_length), K(length_)); + } + } + return ret; +} + +int ObArrayNested::print(const ObCollectionTypeBase *elem_type, ObStringBuffer &format_str, uint32_t begin, uint32_t print_size) const +{ + int ret = OB_SUCCESS; + const ObCollectionArrayType *array_type = dynamic_cast(elem_type); + if (OB_ISNULL(array_type)) { + ret = OB_INVALID_ARGUMENT; + OB_LOG(WARN, "invalid argument", K(ret)); + } else if (OB_FAIL(format_str.append("["))) { + OB_LOG(WARN, "fail to append [", K(ret)); + } else { + if (print_size == 0) { + // print whole array + print_size = length_; + } + for (int i = begin; i < begin + print_size && OB_SUCC(ret); i++) { + if (i > begin && OB_FAIL(format_str.append(","))) { + OB_LOG(WARN, "fail to append \",\" to buffer", K(ret)); + } else if (null_bitmaps_[i]) { + // value is null + if (OB_FAIL(format_str.append("NULL"))) { + OB_LOG(WARN, "fail to append NULL to buffer", K(ret)); + } + } else { + uint32_t start = offset_at(i, offsets_); + uint32_t elem_cnt = offsets_[i] - start; + if (OB_FAIL(data_->print(array_type->element_type_, format_str, start, elem_cnt))) { + OB_LOG(WARN, "fail to append string to format_str", K(ret)); + } + } + } + } + if (OB_SUCC(ret) && OB_FAIL(format_str.append("]"))) { + OB_LOG(WARN, "fail to append ]", K(ret)); + } + return ret; +} + +int ObArrayNested::push_back(const ObIArrayType &src, bool is_null) +{ + int ret = OB_SUCCESS; + if (src.get_format() != data_->get_format() + || src.get_element_type() != data_->get_element_type()) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "inconsistent array type", K(ret), K(src.get_format()), K(src.get_element_type()), + K(data_->get_format()), K(data_->get_element_type())); + } else if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else if (length_ + 1 > MAX_ARRAY_ELEMENT_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array element size exceed max", K(ret), K(length_), K(MAX_ARRAY_ELEMENT_SIZE)); + } else if (is_null) { + if (OB_FAIL(push_null())) { + OB_LOG(WARN, "failed to push null", K(ret)); + } + } else { + uint32_t last_offset = data_container_->offset_at(length_); + uint32_t cur_offset = last_offset + src.size(); + if (OB_FAIL(data_container_->null_bitmaps_.push_back(false))) { + OB_LOG(WARN, "failed to push null", K(ret)); + } else if (OB_FAIL(data_container_->offsets_.push_back(cur_offset))) { + OB_LOG(WARN, "failed to push null", K(ret)); + } else if (OB_FAIL(data_->insert_from(src, 0, src.size()))) { + OB_LOG(WARN, "failed to insert child array", K(ret)); + } else if (get_raw_binary_len() > MAX_ARRAY_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array data length exceed max", K(ret), K(get_raw_binary_len()), K(MAX_ARRAY_SIZE)); + } else { + length_++; + } + } + + return ret; +} + +int ObArrayNested::push_null() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else if (length_ + 1 > MAX_ARRAY_ELEMENT_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array element size exceed max", K(ret), K(length_), K(MAX_ARRAY_ELEMENT_SIZE)); + } else { + uint32_t last_offset = data_container_->offset_at(length_); + if (OB_FAIL(data_container_->null_bitmaps_.push_back(true))) { + OB_LOG(WARN, "failed to push null", K(ret)); + } else if (OB_FAIL(data_container_->offsets_.push_back(last_offset))) { + OB_LOG(WARN, "failed to push null", K(ret)); + } else if (get_raw_binary_len() > MAX_ARRAY_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array data length exceed max", K(ret), K(get_raw_binary_len()), K(MAX_ARRAY_SIZE)); + } else { + length_++; + } + } + return ret; +} + +void ObArrayNested::clear() +{ + null_bitmaps_ = nullptr; + offsets_ = nullptr; + length_ = 0; + if (OB_NOT_NULL(data_)) { + data_->clear(); + } + if (OB_NOT_NULL(data_container_)) { + data_container_->clear(); + } +} + +int ObArrayNested::at(uint32_t idx, ObIArrayType &dest) +{ + int ret = OB_SUCCESS; + uint32_t start = offset_at(idx, get_offsets()); + uint32_t child_len = get_offsets()[idx] - start; + const ObIArrayType *child_arr = get_child_array(); + if (OB_FAIL(dest.insert_from(*child_arr, start, child_len))) { + OB_LOG(WARN, "failed to insert child array", K(ret), K(idx), K(start), K(child_len)); + } else if (OB_FAIL(dest.init())) { + OB_LOG(WARN, "failed to init array element", K(ret), K(idx), K(start), K(child_len)); + } + return ret; +} + +int ObArrayNested::flatten(ObArrayAttr *attrs, uint32_t attr_count, uint32_t &attr_idx) +{ + int ret = OB_SUCCESS; + const uint32_t len = 2; + if (len >= attr_count) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected attr count", K(ret), K(attr_count), K(attr_idx), K(len)); + } else { + attrs[attr_idx].ptr_ = reinterpret_cast(null_bitmaps_); + attrs[attr_idx].length_ = sizeof(uint8_t) * length_; + attr_idx++; + attrs[attr_idx].ptr_ = reinterpret_cast(offsets_); + attrs[attr_idx].length_ = sizeof(uint32_t) * length_; + attr_idx++; + if (OB_FAIL(data_->flatten(attrs, attr_count, attr_idx))) { + OB_LOG(WARN, "failed to flatten data", K(ret), K(attr_count), K(attr_idx)); + } + } + return ret; +} + +int ObArrayNested::compare_at(uint32_t left_begin, uint32_t left_len, + uint32_t right_begin, uint32_t right_len, + const ObIArrayType &right, int &cmp_ret) +{ + int ret = OB_SUCCESS; + uint32_t cmp_len = std::min(left_len, right_len); + cmp_ret = 0; + for (uint32_t i = 0; i < cmp_len && !cmp_ret && OB_SUCC(ret); ++i) { + if (this->is_null(left_begin + i) && !right.is_null(right_begin + i)) { + cmp_ret = 1; + } else if (!this->is_null(left_begin + i) && right.is_null(right_begin + i)) { + cmp_ret = -1; + } else if (this->is_null(left_begin + i) && right.is_null(right_begin + i)) { + } else { + const ObArrayNested *right_nested = dynamic_cast(&right); + uint32_t l_start = offset_at(left_begin + i, get_offsets()); + uint32_t l_child_len = get_offsets()[left_begin + i] - l_start; + uint32_t r_start = right_nested->offset_at(right_begin + i, right_nested->get_offsets()); + uint32_t r_child_len = right_nested->get_offsets()[right_begin + i] - r_start; + if (OB_ISNULL(right_nested)) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + OB_LOG(WARN, "invalid array type", K(ret), K(right.get_format()), K(this->get_format())); + } else if (OB_FAIL(get_child_array()->compare_at(l_start, l_child_len, r_start, r_child_len, *right_nested->get_child_array(), cmp_ret))) { + OB_LOG(WARN, "failed to do child array compare", K(ret), K(l_start), K(l_child_len), K(r_start), K(r_child_len)); + } + } + } + if (!cmp_ret && OB_SUCC(ret) && left_len != right_len) { + cmp_ret = (left_len > right_len ? 1 : -1); + } + return ret; +} + +int ObArrayNested::compare(const ObIArrayType &right, int &cmp_ret) +{ + return compare_at(0, length_, 0, right.size(), right, cmp_ret); +} + +#undef CONSTRUCT_ARRAY_OBJ +#undef CONSTRUCT_FIXED_ARRAY_OBJ + +} // namespace common +} // namespace oceanbase \ No newline at end of file diff --git a/deps/oblib/src/lib/udt/ob_array_type.h b/deps/oblib/src/lib/udt/ob_array_type.h new file mode 100644 index 0000000000..f79a4d5d99 --- /dev/null +++ b/deps/oblib/src/lib/udt/ob_array_type.h @@ -0,0 +1,861 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_OB_ARRAY_TYPE_ +#define OCEANBASE_OB_ARRAY_TYPE_ +#include +#include +#include "lib/string/ob_string.h" +#include "lib/container/ob_vector.h" +#include "lib/container/ob_array_iterator.h" +#include "lib/udt/ob_collection_type.h" +#include "lib/string/ob_string_buffer.h" +#include "lib/wide_integer/ob_wide_integer_str_funcs.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/utility/ob_fast_convert.h" +#include "rpc/obmysql/ob_mysql_global.h" // DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE +#include "src/share/datum/ob_datum.h" + + +namespace oceanbase { +namespace common { + +static constexpr int64_t MAX_ARRAY_SIZE = (1 << 20) * 16; // 16M +static constexpr int64_t MAX_ARRAY_ELEMENT_SIZE = 2000000; +enum ArrayAttr { + ATTR_LENGTH = 0, + ATTR_NULL_BITMAP = 1, + ATTR_OFFSETS = 2, + ATTR_DATA = 3, +}; + +struct ObArrayAttr { + const char *ptr_; + uint32_t length_; +}; + +template +class ObArrayData { +public : + ObArrayData(ObIAllocator &allocator) + : raw_data_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "ARRAYModule")), + null_bitmaps_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "ARRAYModule")), + offsets_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "ARRAYModule")) {} + + using Container = common::ObArray; + using NullContainer = common::ObArray; + using OffsetContainer = common::ObArray; + inline size_t data_length() { return raw_data_.size() * sizeof(T); } + inline size_t nullbitmaps_length() { return null_bitmaps_.size() * sizeof(uint8_t); } + inline size_t offsets_length() { return offsets_.size() * sizeof(uint32_t); } + inline uint32_t offset_at(uint32_t idx) { return idx == 0 ? 0 : offsets_[idx - 1]; } + void clear() { raw_data_.reset(); null_bitmaps_.reset(); offsets_.reset(); } + + Container raw_data_; + NullContainer null_bitmaps_; + OffsetContainer offsets_; +}; + +enum ArrayFormat { + Fixed_Size = 0, + Vector = 1, + Binary_Varlen = 2, + Nested_Array = 3, + Array_MAX_FORMAT +}; + +class ObIArrayType { +public: + virtual int print(const ObCollectionTypeBase *elem_type, ObStringBuffer &format_str, + uint32_t begin = 0, uint32_t print_size = 0) const = 0; + virtual int32_t get_raw_binary_len() = 0; + virtual int get_raw_binary(char *res_buf, int64_t buf_len) = 0; + // without length_ + virtual int32_t get_data_binary_len() = 0; + virtual int get_data_binary(char *res_buf, int64_t buf_len) = 0; + virtual int init(ObString &raw_data) = 0; + virtual int init(ObDatum *attrs, uint32_t attr_count, bool with_length = true) = 0; + virtual int init() = 0; // init array with self data_container + virtual void set_scale(ObScale scale) = 0; // only for decimalint array + virtual ArrayFormat get_format() const = 0; + virtual uint32_t size() const = 0; + virtual int check_validity(const ObCollectionArrayType &arr_type, const ObIArrayType &array) const = 0; + virtual bool is_null(uint32_t idx) const = 0; // check if the idx-th element is null or not, idx validity is guaranteed by caller + virtual int push_null() = 0; + virtual bool contain_null() const = 0; + virtual int insert_from(const ObIArrayType &src, uint32_t begin, uint32_t len) = 0; + virtual int32_t get_element_type() const = 0; + virtual char *get_data() const = 0; + virtual uint32_t *get_offsets() const = 0; + virtual uint8_t *get_nullbitmap() const = 0; + virtual void set_element_type(int32_t type) = 0; + virtual int at(uint32_t idx, ObIArrayType &dest) = 0; + virtual void clear() = 0; + virtual int flatten(ObArrayAttr *attrs, uint32_t attr_count, uint32_t &attr_idx) = 0; + virtual int set_null_bitmaps(uint8_t *nulls, int64_t length) = 0; + virtual int set_offsets(uint32_t *offsets, int64_t length) = 0; + virtual int compare(const ObIArrayType &right, int &cmp_ret) = 0; + virtual int compare_at(uint32_t left_begin, uint32_t left_len, + uint32_t right_begin, uint32_t right_len, + const ObIArrayType &right, int &cmp_ret) = 0; +}; + +template +class ObArrayBase : public ObIArrayType { +public : + ObArrayBase() : length_(0), element_type_(0), null_bitmaps_(nullptr), data_container_(nullptr) {} + ObArrayBase(uint32_t length, int32_t elem_type, uint8_t *null_bitmaps) + : length_(length), element_type_(elem_type), null_bitmaps_(null_bitmaps), data_container_(nullptr) {} + + uint32_t size() const { return length_; } + bool contain_null() const + { + bool bret = false; + for (int64_t i = 0; null_bitmaps_ != nullptr && !bret && i < length_; ++i) { + if (null_bitmaps_[i] > 0) { + bret = true; + } + } + return bret; + } + int32_t get_element_type() const { return element_type_; } + void set_element_type(int32_t type) { element_type_ = type;} + uint8_t *get_nullbitmap() const { return null_bitmaps_;} + // make sure null_bitmaps_ isn't nullptr and idx is less than length_ + bool is_null(uint32_t idx) const { return null_bitmaps_[idx] > 0; } + // make sure offsets isn't nullptr and idx is less than length + uint32_t offset_at(uint32_t idx, uint32_t *offsets) const { return idx == 0 ? 0 : offsets[idx - 1]; } + inline void set_array_data(ObArrayData *arr_data) { data_container_ = arr_data;} + int set_null_bitmaps(uint8_t *nulls, int64_t length) + { + int ret = OB_SUCCESS; + if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else { + int64_t curr_pos = data_container_->null_bitmaps_.size(); + int64_t capacity = curr_pos + length; + data_container_->null_bitmaps_.prepare_allocate(capacity); + uint8_t *cur_null_bitmap = data_container_->null_bitmaps_.get_data() + curr_pos; + MEMCPY(cur_null_bitmap, nulls, length * sizeof(uint8_t)); + } + return ret; + } + int set_offsets(uint32_t *offsets, int64_t length) + { + int ret = OB_SUCCESS; + if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else { + int64_t curr_pos = data_container_->offsets_.size(); + int64_t capacity = curr_pos + length; + data_container_->offsets_.prepare_allocate(capacity); + char *cur_offsets = reinterpret_cast(data_container_->offsets_.get_data() + curr_pos * sizeof(uint32_t)); + MEMCPY(cur_offsets, offsets, length * sizeof(uint32_t)); + } + return ret; + } + int get_reserved_data(int64_t length, T *&data) + { + int ret = OB_SUCCESS; + if (OB_ISNULL(data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else { + int64_t curr_pos = data_container_->raw_data_.size(); + int64_t capacity = curr_pos + length; + data_container_->raw_data_.prepare_allocate(capacity); + data = reinterpret_cast(data_container_->raw_data_.get_data() + curr_pos); + } + return ret; + } + +protected : + uint32_t length_; + int32_t element_type_; + uint8_t *null_bitmaps_; + ObArrayData *data_container_; +}; + +template +class ObArrayFixedSize : public ObArrayBase { +public : + ObArrayFixedSize() : ObArrayBase(), data_(nullptr), scale_(0) {} + ObArrayFixedSize(uint32_t length, int32_t elem_type, uint8_t *null_bitmaps, T *data, uint32_t scale = 0) + : ObArrayBase(length, elem_type, null_bitmaps), + data_(data), scale_(scale) {} + inline void set_data(T *data, uint32_t len) { data_ = data; this->length_ = len;} + inline int16_t get_scale() { return scale_; } + void set_scale(ObScale scale) { scale_ = scale; } // only for decimalint array + T operator[](const int64_t i) const { return data_[i]; } + ObDecimalInt *get_decimal_int(const int64_t i) { return (ObDecimalInt *)(data_ + i); } + ArrayFormat get_format() const { return ArrayFormat::Fixed_Size; } + uint32_t *get_offsets() const { return nullptr; } + char *get_data() const { return reinterpret_cast(data_);} + int check_validity(const ObCollectionArrayType &arr_type, const ObIArrayType &array) const { return OB_SUCCESS; } + int push_null() + { + int ret = OB_SUCCESS; + if (OB_ISNULL(this->data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else if (this->length_ + 1 > MAX_ARRAY_ELEMENT_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array element size exceed max", K(ret), K(this->length_), K(MAX_ARRAY_ELEMENT_SIZE)); + } else if (OB_FAIL(this->data_container_->raw_data_.push_back(0))) { + OB_LOG(WARN, "failed to push value to array data", K(ret)); + } else if (OB_FAIL(this->data_container_->null_bitmaps_.push_back(1))) { + // push back null + OB_LOG(WARN, "failed to push null", K(ret)); + } else if (get_raw_binary_len() > MAX_ARRAY_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array data length exceed max", K(ret), K(get_raw_binary_len()), K(MAX_ARRAY_SIZE)); + } else { + this->length_++; + } + return ret; + } + int push_back(T value, bool is_null = false) + { + int ret = OB_SUCCESS; + if (OB_ISNULL(this->data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else if (this->length_ + 1 > MAX_ARRAY_ELEMENT_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array element size exceed max", K(ret), K(this->length_), K(MAX_ARRAY_ELEMENT_SIZE)); + } else if (is_null) { + if (OB_FAIL(this->data_container_->raw_data_.push_back(0))) { + OB_LOG(WARN, "failed to push value to array data", K(ret)); + } else if (OB_FAIL(this->data_container_->null_bitmaps_.push_back(1))) { + // push back null + OB_LOG(WARN, "failed to push null", K(ret)); + } + } else if (OB_FAIL(this->data_container_->raw_data_.push_back(value))) { + OB_LOG(WARN, "failed to push value to array data", K(ret)); + } else if (OB_FAIL(this->data_container_->null_bitmaps_.push_back(0))) { + OB_LOG(WARN, "failed to push null", K(ret)); + } else if (get_raw_binary_len() > MAX_ARRAY_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array data length exceed max", K(ret), K(get_raw_binary_len()), K(MAX_ARRAY_SIZE)); + } + if (OB_SUCC(ret)) { + this->length_++; + } + return ret; + } + + int print(const ObCollectionTypeBase *elem_type, ObStringBuffer &format_str, + uint32_t begin = 0, uint32_t print_size = 0) const + { + int ret = OB_SUCCESS; + const ObCollectionBasicType *basic_type = dynamic_cast(elem_type); + if (OB_ISNULL(basic_type)) { + ret = OB_INVALID_ARGUMENT; + OB_LOG(WARN, "invalid argument", K(ret)); + } else if (OB_FAIL(format_str.append("["))) { + OB_LOG(WARN, "fail to append [", K(ret)); + } else { + if (print_size == 0) { + // print whole element + print_size = this->length_; + } + ObObjType obj_type = basic_type->basic_meta_.get_obj_type(); + for (int i = begin; i < begin + print_size && OB_SUCC(ret); i++) { + if (i > begin && OB_FAIL(format_str.append(","))) { + OB_LOG(WARN, "fail to append \",\" to buffer", K(ret)); + } else if (this->null_bitmaps_[i]) { + // value is null + if (OB_FAIL(format_str.append("NULL"))) { + OB_LOG(WARN, "fail to append NULL to buffer", K(ret)); + } + } else { + switch (obj_type) { + case ObTinyIntType: + case ObSmallIntType: + case ObIntType: + case ObInt32Type: { + char tmp_buf[ObFastFormatInt::MAX_DIGITS10_STR_SIZE] = {0}; + int64_t len = ObFastFormatInt::format_signed(data_[i], tmp_buf); + if (OB_FAIL(format_str.append(tmp_buf, len))) { + OB_LOG(WARN, "fail to append int to buffer", K(ret), K(data_[i]), K(print_size)); + } + break; + } + case ObUTinyIntType: + case ObUSmallIntType: + case ObUInt64Type: + case ObUInt32Type: { + char tmp_buf[ObFastFormatInt::MAX_DIGITS10_STR_SIZE] = {0}; + int64_t len = ObFastFormatInt::format_unsigned(data_[i], tmp_buf); + if (OB_FAIL(format_str.append(tmp_buf, len))) { + OB_LOG(WARN, "fail to append int to buffer", K(ret), K(data_[i]), K(print_size)); + } + break; + } + case ObFloatType : + case ObDoubleType : { + int buf_size = obj_type == ObFloatType ? FLOAT_TO_STRING_CONVERSION_BUFFER_SIZE : DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE; + if (OB_FAIL(format_str.reserve(buf_size + 1))) { + OB_LOG(WARN, "fail to reserve memory for format_str", K(ret)); + } else { + char *start = format_str.ptr() + format_str.length(); + uint64_t len = ob_gcvt(data_[i], + obj_type == ObFloatType ? ob_gcvt_arg_type::OB_GCVT_ARG_FLOAT : ob_gcvt_arg_type::OB_GCVT_ARG_DOUBLE, + buf_size, start, NULL); + if (OB_FAIL(format_str.set_length(format_str.length() + len))) { + OB_LOG(WARN, "fail to set format_str len", K(ret), K(format_str.length()), K(len)); + } + } + break; + } + case ObDecimalIntType : { + int64_t pos = 0; + char tmp_buf[ObFastFormatInt::MAX_DIGITS10_STR_SIZE] = {0}; + if (OB_FAIL(wide::to_string(reinterpret_cast(&data_[i]), sizeof(data_[i]), scale_, + tmp_buf, ObFastFormatInt::MAX_DIGITS10_STR_SIZE, pos))) { + OB_LOG(WARN, "fail to format decimal int to string", K(ret), K(data_[i]), K(print_size)); + } else if (OB_FAIL(format_str.append(tmp_buf, pos))) { + OB_LOG(WARN, "fail to append decimal int to buffer", K(ret), K(data_[i]), K(print_size)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected element type", K(ret), K(basic_type->basic_meta_.get_obj_type())); + } + } + } + } + } + if (OB_SUCC(ret) && OB_FAIL(format_str.append("]"))) { + OB_LOG(WARN, "fail to append ]", K(ret)); + } + return ret; + } + + + int32_t get_data_binary_len() + { + if (this->data_container_ == NULL) { + return this->length_ * sizeof(uint8_t) + this->length_ * sizeof(T); + } + return sizeof(uint8_t) * this->data_container_->null_bitmaps_.size() + + sizeof(T) * this->data_container_->raw_data_.size(); + } + int get_data_binary(char *res_buf, int64_t buf_len) + { + int ret = OB_SUCCESS; + int64_t pos = 0; + if (get_data_binary_len() > buf_len) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "buf len isn't enough", K(ret), K(buf_len), K(pos)); + } else if (this->data_container_ == NULL) { + MEMCPY(res_buf + pos, this->null_bitmaps_, sizeof(uint8_t) * this->length_); + pos += sizeof(uint8_t) * this->length_; + MEMCPY(res_buf + pos, this->data_, sizeof(T) * this->length_); + } else { + MEMCPY(res_buf + pos, reinterpret_cast(this->data_container_->null_bitmaps_.get_data()), sizeof(uint8_t) * this->data_container_->null_bitmaps_.size()); + pos += sizeof(uint8_t) * this->data_container_->null_bitmaps_.size(); + MEMCPY(res_buf + pos, reinterpret_cast(this->data_container_->raw_data_.get_data()), sizeof(T) * this->data_container_->raw_data_.size()); + } + return ret; + } + int32_t get_raw_binary_len() { return sizeof(this->length_) + get_data_binary_len(); } + int get_raw_binary(char *res_buf, int64_t buf_len) + { + int ret = OB_SUCCESS; + int64_t pos = 0; + if (get_raw_binary_len() > buf_len) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "buf len isn't enough", K(ret), K(buf_len), K(pos)); + } else { + MEMCPY(res_buf + pos, &this->length_, sizeof(this->length_)); + pos += sizeof(this->length_); + if (OB_FAIL(get_data_binary(res_buf + pos, buf_len - pos))) { + OB_LOG(WARN, "get data binary failed", K(ret), K(buf_len), K(pos)); + } + } + return ret; + } + + int init() + { + int ret = OB_SUCCESS; + if (OB_ISNULL(this->data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else { + this->length_ = this->data_container_->raw_data_.size(); + data_ = this->data_container_->raw_data_.get_data(); + this->null_bitmaps_ = this->data_container_->null_bitmaps_.get_data(); + } + return ret; + } + + int init(ObString &raw_data) + { + int ret = OB_SUCCESS; + int64_t pos = 0; + char *raw_str = raw_data.ptr(); + if (raw_data.length() < sizeof(this->length_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "raw data len is invalid", K(ret), K(raw_data.length())); + } else { + this->length_ = *reinterpret_cast(raw_str); + pos += sizeof(this->length_); + this->null_bitmaps_ = reinterpret_cast(raw_str + pos); + if (pos + sizeof(uint8_t) * this->length_ > raw_data.length()) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "raw data len is invalid", K(ret), K(pos), K(this->length_), K(raw_data.length())); + } else { + pos += sizeof(uint8_t) * this->length_; + data_ = reinterpret_cast(raw_str + pos); + if (pos + sizeof(T) * this->length_ > raw_data.length()) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "raw data len is invalid", K(ret), K(pos), K(this->length_), K(raw_data.length())); + } + } + } + return ret; + } + + int init(ObDatum *attrs, uint32_t attr_count, bool with_length = true) + { + int ret = OB_SUCCESS; + const uint32_t count = with_length ? 3 : 2; + if (attr_count != count) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected attrs", K(ret), K(attr_count), K(count)); + } else { + uint32_t idx = 0; + if (with_length) { + this->length_ = attrs[idx++].get_uint32(); + } else { + this->length_ = attrs[0].get_int_bytes() / sizeof(uint8_t); + } + this->null_bitmaps_ = const_cast(reinterpret_cast(attrs[idx++].get_string().ptr())); + data_ = const_cast(reinterpret_cast(attrs[idx++].get_string().ptr())); + if ((with_length && (this->length_ != attrs[1].get_int_bytes() / sizeof(uint8_t) || this->length_ != attrs[2].get_int_bytes() / sizeof(T))) + || (!with_length && (this->length_ != attrs[1].get_int_bytes() / sizeof(T)))) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected attrs", K(ret), K(with_length), K(this->length_)); + } + } + return ret; + } + int insert_from(const ObIArrayType &src, uint32_t begin, uint32_t len) + { + int ret = OB_SUCCESS; + if (src.get_format() != get_format() + || src.get_element_type() != this->element_type_) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "inconsistent array type", K(ret), K(src.get_format()), K(src.get_element_type()), + K(get_format()), K(this->element_type_)); + } else if (OB_ISNULL(this->data_container_)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "try to modify read-only array", K(ret)); + } else { + const uint32_t src_data_offset = begin * sizeof(T); + const uint32_t src_null_offset = begin * sizeof(uint8_t); + int64_t curr_pos = this->data_container_->raw_data_.size(); + int64_t capacity = curr_pos + len; + this->data_container_->raw_data_.prepare_allocate(capacity); + char *cur_data = reinterpret_cast(this->data_container_->raw_data_.get_data() + curr_pos); + MEMCPY(cur_data, src.get_data() + src_data_offset, len * sizeof(T)); + // insert nullbitmaps + curr_pos = this->data_container_->null_bitmaps_.size(); + capacity = curr_pos + len; + this->data_container_->null_bitmaps_.prepare_allocate(capacity); + uint8_t *cur_null_bitmap = this->data_container_->null_bitmaps_.get_data() + curr_pos; + MEMCPY(cur_null_bitmap, src.get_nullbitmap() + src_null_offset, len * sizeof(uint8_t)); + this->length_ += len; + } + return ret; + } + int at(uint32_t idx, ObIArrayType &dest) { return OB_NOT_SUPPORTED; } + void clear() + { + data_ = nullptr; + this->null_bitmaps_ = nullptr; + this->length_ = 0; + if (OB_NOT_NULL(this->data_container_)) { + this->data_container_->clear(); + } + } + int flatten(ObArrayAttr *attrs, uint32_t attr_count, uint32_t &attr_idx) + { + int ret = OB_SUCCESS; + const uint32_t len = 2; + if (len + attr_idx >= attr_count) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected attr count", K(ret), K(attr_count), K(attr_idx), K(len)); + } else { + attrs[attr_idx].ptr_ = reinterpret_cast(this->null_bitmaps_); + attrs[attr_idx].length_ = sizeof(uint8_t) * this->length_; + attr_idx++; + attrs[attr_idx].ptr_ = reinterpret_cast(data_); + attrs[attr_idx].length_ = sizeof(T) * this->length_; + attr_idx++; + } + return ret; + } + + int compare_at(uint32_t left_begin, uint32_t left_len, uint32_t right_begin, uint32_t right_len, + const ObIArrayType &right, int &cmp_ret) + { + int ret = OB_SUCCESS; + const ObArrayFixedSize *right_data = dynamic_cast *>(&right); + if (OB_ISNULL(right_data)) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + OB_LOG(WARN, "invalid array type", K(ret), K(right.get_format()), K(this->get_format())); + } else { + uint32_t cmp_len = std::min(left_len, right_len); + cmp_ret = 0; + for (uint32_t i = 0; i < cmp_len && !cmp_ret; ++i) { + if (this->is_null(left_begin + i) && !right.is_null(right_begin + i)) { + cmp_ret = 1; + } else if (!this->is_null(left_begin + i) && right.is_null(right_begin + i)) { + cmp_ret = -1; + } else if (this->is_null(left_begin + i) && right.is_null(right_begin + i)) { + } else if (this->data_[left_begin + i] != (*right_data)[right_begin + i]) { + cmp_ret = this->data_[left_begin + i] > (*right_data)[right_begin + i] ? 1 : -1; + } + } + if (cmp_ret == 0 && left_len != right_len) { + cmp_ret = left_len > right_len ? 1 : -1; + } + } + return ret; + } + + int compare(const ObIArrayType &right, int &cmp_ret) + { + return compare_at(0, this->length_, 0, right.size(), right, cmp_ret); + } + template + int contains(const Elem_Type &elem, bool &bret) const + { + int ret = OB_SUCCESS; + bret = false; + for (uint32_t i = 0; i < this->length_ && !bret; ++i) { + if (this->is_null(i)) { + } else if (static_cast(this->data_[i]) == elem) { + bret = true; + } + } + return ret; + } + template <> + int contains(const ObString &elem, bool &bret) const + { + return OB_INVALID_ARGUMENT; + } + template <> + int contains(const ObIArrayType &elem, bool &bret) const + { + return OB_INVALID_ARGUMENT; + } + +private : + T *data_; + int16_t scale_; // only for decimalint type +}; + +class ObVectorData : public ObArrayBase { +public : + ObVectorData() : ObArrayBase(), data_(nullptr) {} + ObVectorData(uint32_t length, float *data) + : ObArrayBase(length, ObFloatType, nullptr), + data_(data) {} + float operator[](const int64_t i) const { return data_[i]; } + ArrayFormat get_format() const { return ArrayFormat::Vector; } + int push_back(float value); + void set_scale(ObScale scale) { UNUSED(scale); } + int print(const ObCollectionTypeBase *elem_type, ObStringBuffer &format_str, + uint32_t begin = 0, uint32_t print_size = 0) const; + uint32_t *get_offsets() const { return nullptr; } + char *get_data() const { return reinterpret_cast(data_);} + int32_t get_raw_binary_len() + { + return this->data_container_ == NULL ? (this->length_ * sizeof(float)) : (sizeof(float) * data_container_->raw_data_.size()); + } + int get_raw_binary(char *res_buf, int64_t buf_len); + int32_t get_data_binary_len() { return get_raw_binary_len(); } + int get_data_binary(char *res_buf, int64_t buf_len) { return get_raw_binary(res_buf, buf_len); } + int init (); + int init(ObString &raw_data); + int init(ObDatum *attrs, uint32_t attr_count, bool with_length = true); + int check_validity(const ObCollectionArrayType &arr_type, const ObIArrayType &array) const; + int push_null() { return OB_ERR_NULL_VALUE; } + int insert_from(const ObIArrayType &src, uint32_t begin, uint32_t len); + int at(uint32_t idx, ObIArrayType &dest) { return OB_NOT_SUPPORTED; } + void clear(); + int flatten(ObArrayAttr *attrs, uint32_t attr_count, uint32_t &attr_idx); + int compare_at(uint32_t left_begin, uint32_t left_len, uint32_t right_begin, uint32_t right_len, + const ObIArrayType &right, int &cmp_ret); + int compare(const ObIArrayType &right, int &cmp_ret); + template + int contains(const Elem_Type &elem, bool &bret) const + { + int ret = OB_SUCCESS; + bret = false; + for (uint32_t i = 0; i < length_ && !bret; ++i) { + if (static_cast(data_[i]) == elem) { + bret = true; + } + } + return ret; + } + template <> + int contains(const ObString &elem, bool &bret) const + { + return OB_INVALID_ARGUMENT; + } + template <> + int contains(const ObIArrayType &elem, bool &bret) const + { + return OB_INVALID_ARGUMENT; + } + +private : + float *data_; +}; + +class ObArrayBinary : public ObArrayBase { +public : + ObArrayBinary() : ObArrayBase(), offsets_(nullptr), data_(nullptr) {} + ObArrayBinary(uint32_t length, int32_t elem_type, uint8_t *null_bitmaps, uint32_t *offsets, char *data) + : ObArrayBase(length, elem_type, null_bitmaps), + offsets_(offsets), data_(data) {} + ObString operator[](const int64_t i) const; + ArrayFormat get_format() const { return ArrayFormat::Binary_Varlen; } + uint32_t *get_offsets() const { return offsets_; } + char *get_data() const { return data_;} + int push_back(const ObString &value, bool is_null = false); + void set_scale(ObScale scale) { UNUSED(scale); } + int print(const ObCollectionTypeBase *elem_type, ObStringBuffer &format_str, + uint32_t begin = 0, uint32_t print_size = 0) const; + + int32_t get_data_binary_len() + { + int32_t len = 0; + if (this->data_container_ == NULL) { + uint32_t last_idx = this->length_ > 0 ? this->length_ - 1 : 0; + len = this->length_ * sizeof(uint8_t) + this->length_ * sizeof(uint32_t) + this->offsets_[last_idx]; + } else { + len = sizeof(uint8_t) * data_container_->null_bitmaps_.size() + + sizeof(uint32_t) * data_container_->offsets_.size() + + data_container_->raw_data_.size(); + } + return len; + } + int get_data_binary(char *res_buf, int64_t buf_len); + int32_t get_raw_binary_len() { return sizeof(length_) + get_data_binary_len(); } + int get_raw_binary(char *res_buf, int64_t buf_len); + int init(); + int init(ObString &raw_data); + int init(ObDatum *attrs, uint32_t attr_count, bool with_length = true); + int check_validity(const ObCollectionArrayType &arr_type, const ObIArrayType &array) const { return OB_SUCCESS; } + int push_null(); + int insert_from(const ObIArrayType &src, uint32_t begin, uint32_t len); + int at(uint32_t idx, ObIArrayType &dest) { return OB_NOT_SUPPORTED; } + void clear(); + int flatten(ObArrayAttr *attrs, uint32_t attr_count, uint32_t &attr_idx); + int compare(const ObIArrayType &right, int &cmp_ret); + int compare_at(uint32_t left_begin, uint32_t left_len, uint32_t right_begin, uint32_t right_len, + const ObIArrayType &right, int &cmp_ret); + template + int contains(const T &elem, bool &bret) const + { + int ret = OB_SUCCESS; + bret = false; + const ObString *str = nullptr; + if (typeid(T) != typeid(ObString)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "invalid data type", K(ret)); + } else { + str = reinterpret_cast(&elem); + for (int i = 0; i < length_ && !bret; i++) { + if ((*this)[i].compare(*str) == 0) { + bret = true; + } + } + } + return ret; + } + +private : + uint32_t *offsets_; + char *data_; +}; + +class ObArrayNested : public ObArrayBase { +public : + ObArrayNested() : ObArrayBase(), offsets_(nullptr), data_(nullptr) {} + ObArrayNested(uint32_t length, int32_t elem_type, uint8_t *null_bitmaps, uint32_t *offsets, ObArrayBase *data) + : ObArrayBase(length, elem_type, null_bitmaps), + offsets_(offsets), data_(data) {} + ArrayFormat get_format() const { return ArrayFormat::Nested_Array; } + uint32_t *get_offsets() const { return offsets_; } + char *get_data() const { return data_->get_data();} + void set_scale(ObScale scale) { UNUSED(scale); } + int print(const ObCollectionTypeBase *elem_type, ObStringBuffer &format_str, + uint32_t begin = 0, uint32_t print_size = 0) const; + + int32_t get_data_binary_len() + { + return this->data_container_ == NULL ? (this->length_ * sizeof(uint8_t) + this->length_ * sizeof(uint32_t) + data_->get_data_binary_len()) + : (sizeof(uint8_t) * data_container_->null_bitmaps_.size() + + sizeof(uint32_t) * data_container_->offsets_.size() + + data_->get_data_binary_len()); + } + int get_data_binary(char *res_buf, int64_t buf_len); + + int32_t get_raw_binary_len() { return sizeof(length_) + get_data_binary_len(); } + int get_raw_binary(char *res_buf, int64_t buf_len); + int init(); + int init(ObString &raw_data); + int init(ObDatum *attrs, uint32_t attr_count, bool with_length = true); + int check_validity(const ObCollectionArrayType &arr_type, const ObIArrayType &array) const { return OB_SUCCESS; } + int push_null(); + inline void set_child_array(ObIArrayType *child) { data_ = static_cast(child);} + inline ObIArrayType *get_child_array() const { return static_cast(data_);} + int insert_from(const ObIArrayType &src, uint32_t begin, uint32_t len); + int push_back(const ObIArrayType &src, bool is_null = false); + int at(uint32_t idx, ObIArrayType &dest); + void clear(); + int flatten(ObArrayAttr *attrs, uint32_t attr_count, uint32_t &attr_idx); + int compare_at(uint32_t left_begin, uint32_t left_len, uint32_t right_begin, uint32_t right_len, + const ObIArrayType &right, int &cmp_ret); + int compare(const ObIArrayType &right, int &cmp_ret); + template + int contains(const T &elem, bool &bret) const + { + int ret = OB_SUCCESS; + bret = false; + const ObIArrayType *elem_ptr = NULL; + if (typeid(T) != typeid(ObIArrayType)) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "invalid data type", K(ret)); + } else { + elem_ptr = reinterpret_cast(&elem); + } + for (uint32_t i = 0; i < length_ && !bret && OB_SUCC(ret); ++i) { + if (this->is_null(i)) { + } else { + uint32_t l_start = offset_at(i, get_offsets()); + uint32_t l_child_len = get_offsets()[i] - l_start; + int cmp_ret = 0; + if (OB_FAIL(get_child_array()->compare_at(l_start, l_child_len, 0, elem_ptr->size(), *elem_ptr, cmp_ret))) { + OB_LOG(WARN, "failed to do nested array contains", K(ret)); + } else if (cmp_ret == 0) { + bret = true; + } + } + } + return ret; + } + +private : + uint32_t *offsets_; + ObArrayBase *data_; + // data_container: only maintain null_bitmaps_ and offsets_, raw_data is in the child element +}; + +class ObArrayTypeObjFactory +{ +public: + ObArrayTypeObjFactory() {}; + virtual ~ObArrayTypeObjFactory() {}; + static int construct(common::ObIAllocator &alloc, const ObCollectionTypeBase &array_meta, ObIArrayType *&arr_obj, bool read_only = false); +private: + DISALLOW_COPY_AND_ASSIGN(ObArrayTypeObjFactory); +}; + + +#define FIXED_ARRAY_OBJ_CONTAINS(Element_Type) \ + const ObArrayFixedSize *arr_ptr = static_cast *>(&array); \ + if (OB_FAIL(arr_ptr->contains(elem, bret))) { \ + OB_LOG(WARN, "array contains failed", K(ret)); \ + } +class ObArrayUtil +{ +public : + static int get_type_name(const ObDataType &elem_type, char *buf, int buf_len, uint32_t depth = 1); + static int push_back_decimal_int(const ObPrecision prec, const ObDecimalInt *dec_val, bool is_null, ObIArrayType *arr_obj); + template + static int contains(const ObIArrayType &array, const Elem_Type &elem, bool &bret) + { + int ret = OB_SUCCESS; + switch (array.get_format()) { + case ArrayFormat::Fixed_Size : + if (static_cast(array.get_element_type()) == ObTinyIntType) { + FIXED_ARRAY_OBJ_CONTAINS(int8_t); + } else if (static_cast(array.get_element_type()) == ObSmallIntType) { + FIXED_ARRAY_OBJ_CONTAINS(int16_t); + } else if (static_cast(array.get_element_type()) == ObIntType) { + FIXED_ARRAY_OBJ_CONTAINS(int64_t); + } else if (static_cast(array.get_element_type()) == ObInt32Type) { + FIXED_ARRAY_OBJ_CONTAINS(int32_t); + } else if (static_cast(array.get_element_type()) == ObUTinyIntType) { + FIXED_ARRAY_OBJ_CONTAINS(uint8_t); + } else if (static_cast(array.get_element_type()) == ObUSmallIntType) { + FIXED_ARRAY_OBJ_CONTAINS(uint16_t); + } else if (static_cast(array.get_element_type()) == ObUInt64Type) { + FIXED_ARRAY_OBJ_CONTAINS(int64_t); + } else if (static_cast(array.get_element_type()) == ObUInt32Type) { + FIXED_ARRAY_OBJ_CONTAINS(uint32_t); + } else if (static_cast(array.get_element_type()) == ObFloatType) { + FIXED_ARRAY_OBJ_CONTAINS(float); + } else if (static_cast(array.get_element_type()) == ObDoubleType) { + FIXED_ARRAY_OBJ_CONTAINS(double); + } else { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "invalid array type", K(ret), K(array.get_element_type())); + } + break; + case ArrayFormat::Binary_Varlen : + if (OB_FAIL(static_cast(&array)->contains(elem, bret))) { + OB_LOG(WARN, "failed to do array contains", K(ret)); + } + break; + case ArrayFormat::Vector : + if (OB_FAIL(static_cast(&array)->contains(elem, bret))) { + OB_LOG(WARN, "failed to do array contains", K(ret)); + } + break; + case ArrayFormat::Nested_Array : + if (OB_FAIL(static_cast(&array)->contains(elem, bret))) { + OB_LOG(WARN, "failed to do array contains", K(ret)); + } + break; + default: + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "invalid array type", K(ret), K(array.get_format())); + break; + } + return ret; + } + static int convert_collection_bin_to_string(const ObString &collection_bin, + const common::ObIArray &extended_type_info, + common::ObIAllocator &allocator, + ObString &res_str); + static int get_mysql_type(const common::ObIArray &extended_type_info, + obmysql::EMySQLFieldType &type); +}; +#undef FIXED_ARRAY_OBJ_CONTAINS + +} // namespace common +} // namespace oceanbase +#endif // OCEANBASE_OB_ARRAY_TYPE_ diff --git a/deps/oblib/src/lib/udt/ob_collection_type.cpp b/deps/oblib/src/lib/udt/ob_collection_type.cpp new file mode 100644 index 0000000000..ea9cc01a01 --- /dev/null +++ b/deps/oblib/src/lib/udt/ob_collection_type.cpp @@ -0,0 +1,492 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX LIB +#include +#include "ob_collection_type.h" + +namespace oceanbase { +namespace common { + + +int ObCollectionBasicType::serialize(char *buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + *reinterpret_cast(buf + pos) = type_id_; + pos += sizeof(type_id_); + MEMCPY(buf + pos, reinterpret_cast(&basic_meta_), sizeof(basic_meta_)); + pos += sizeof(basic_meta_); + return ret; +} + +int ObCollectionBasicType::deserialize(const char *buf, const int64_t data_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + type_id_ = *reinterpret_cast(buf + pos); + pos += sizeof(type_id_); + MEMCPY(reinterpret_cast(&basic_meta_), buf + pos, sizeof(basic_meta_)); + pos += sizeof(basic_meta_); + return ret; +} + +int64_t ObCollectionBasicType::get_serialize_size() const +{ + int64_t len = 0; + len += sizeof(type_id_); + len += sizeof(basic_meta_); + return len; +} + +int ObCollectionBasicType::deep_copy(ObIAllocator &allocator, ObCollectionTypeBase *&dst) const +{ + int ret = OB_SUCCESS; + ObCollectionBasicType *buf = OB_NEWx(ObCollectionBasicType, &allocator); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc collection basic type memory failed", K(ret)); + } else { + buf->type_id_ = type_id_; + buf->basic_meta_ = basic_meta_; + dst = buf; + } + return ret; +} + +bool ObCollectionBasicType::has_same_super_type(const ObCollectionBasicType &other) const +{ + bool bret = false; + if (get_compatiable_type_id() != other.get_compatiable_type_id()) { + } else if (basic_meta_.meta_ != other.basic_meta_.meta_) { + if (ob_is_null(basic_meta_.meta_.get_type()) || ob_is_null(other.basic_meta_.meta_.get_type())) { + bret = true; + } else if (ob_is_numeric_type(basic_meta_.meta_.get_type()) && ob_is_numeric_type(other.basic_meta_.meta_.get_type())) { + bret = true; + } + } else { + bret = true; + } + return bret; +} + +int ObCollectionArrayType::serialize(char *buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(element_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid array type for serialize", K(ret)); + } else { + LST_DO_CODE(OB_UNIS_ENCODE, type_id_); + LST_DO_CODE(OB_UNIS_ENCODE, dim_cnt_); + if (OB_FAIL(element_type_->serialize(buf, buf_len, pos))) { + LOG_WARN("serialize array element type failed", K(ret)); + } + } + return ret; +} + +int ObCollectionArrayType::deserialize(const char *buf, const int64_t data_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + LST_DO_CODE(OB_UNIS_DECODE, type_id_); + LST_DO_CODE(OB_UNIS_DECODE, dim_cnt_); + if (OB_FAIL(ObSqlCollectionInfo::collection_type_deserialize(allocator_, buf, data_len, pos, element_type_))) { + LOG_WARN("deserialize element type failed", K(ret)); + } + return ret; +} + +int64_t ObCollectionArrayType::get_serialize_size() const +{ + int64_t len = 0; + LST_DO_CODE(OB_UNIS_ADD_LEN, type_id_); + LST_DO_CODE(OB_UNIS_ADD_LEN, dim_cnt_); + len += element_type_->get_serialize_size(); + return len; +} + +int ObCollectionArrayType::deep_copy(ObIAllocator &allocator, ObCollectionTypeBase *&dst) const +{ + int ret = OB_SUCCESS; + ObCollectionArrayType *buf = OB_NEWx(ObCollectionArrayType, &allocator, allocator); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc collection arry type memory failed", K(ret)); + } else if (OB_FAIL(element_type_->deep_copy(allocator, buf->element_type_))) { + LOG_WARN("do element type deep copy failed", K(ret)); + } else { + buf->type_id_ = type_id_; + buf->dim_cnt_ = dim_cnt_; + dst = buf; + } + return ret; +} + +bool ObCollectionArrayType::has_same_super_type(const ObCollectionArrayType &other) const +{ + bool bret = false; + if (type_id_ == ObNestedType::OB_VECTOR_TYPE + && other.type_id_ == ObNestedType::OB_VECTOR_TYPE && dim_cnt_ != other.dim_cnt_ ) { + // return false + } else if (get_compatiable_type_id() != other.get_compatiable_type_id()) { + // return false + } else if (OB_NOT_NULL(element_type_) && OB_NOT_NULL(other.element_type_)) { + if (element_type_->type_id_ != other.element_type_->type_id_) { + } else if (element_type_->type_id_ == ObNestedType::OB_BASIC_TYPE) { + bret = static_cast(element_type_)->has_same_super_type(*static_cast(other.element_type_)); + } else { + bret = static_cast(element_type_)->has_same_super_type(*static_cast(other.element_type_)); + } + } + return bret; +} + +OB_DEF_SERIALIZE(ObSqlCollectionInfo) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ret)) { + } else if (name_len_ <= 0 || OB_ISNULL(name_def_) || OB_ISNULL(collection_meta_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid udt name length for serialize", K(ret), K(*this)); + } else { + *reinterpret_cast(buf + pos) = name_len_; + pos += sizeof(name_len_); + MEMCPY(buf + pos, name_def_, name_len_); + pos += name_len_; + if (OB_FAIL(collection_meta_->serialize(buf, buf_len, pos))) { + LOG_WARN("invalid udt name length for serialize", K(ret), K(*this)); + } + } + return ret; +} + +OB_DEF_DESERIALIZE(ObSqlCollectionInfo) +{ + int ret = OB_SUCCESS; + name_len_ = *reinterpret_cast(buf + pos); + pos += sizeof(name_len_); + if (name_len_ <= 0 || pos >= data_len) { + ret = OB_DESERIALIZE_ERROR; + LOG_WARN("invalid udt name length for deseriazlie", K(ret), K(*this), K(pos), K(data_len)); + } else { + name_def_ = buf + pos; + pos += name_len_; + if (OB_FAIL(collection_type_deserialize(allocator_, buf, data_len, pos, collection_meta_))) { + LOG_WARN("deserialize collection meta failed", K(ret), K(*this)); + } + } + return ret; +} + +// serialize size cannot return error code +OB_DEF_SERIALIZE_SIZE(ObSqlCollectionInfo) +{ + int64_t len = 0; + len += sizeof(name_len_); + len += name_len_; + len += collection_meta_->get_serialize_size(); + return len; +} + +int ObSqlCollectionInfo::deep_copy(ObIAllocator &allocator, ObSqlCollectionInfo *&dst) const +{ + int ret = OB_SUCCESS; + ObSqlCollectionInfo *buf = OB_NEWx(ObSqlCollectionInfo, &allocator, allocator); + char *copy_name; + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc collection info memory failed", K(ret)); + } else if (OB_ISNULL(copy_name = static_cast(allocator.alloc(name_len_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc collection type name failed", K(ret)); + } else if (OB_FAIL(collection_meta_->deep_copy(allocator, buf->collection_meta_))) { + LOG_WARN("do element type deep copy failed", K(ret)); + } else { + MEMCPY(copy_name, name_def_, name_len_); + ObString tmp_name(name_len_, copy_name); + buf->set_name(tmp_name); + dst = buf; + } + + return ret; +} + +int ObSqlCollectionInfo::collection_type_deserialize(ObIAllocator &allocator, const char* buf, const int64_t data_len, + int64_t& pos, ObCollectionTypeBase *&collection_meta) +{ + int ret = OB_SUCCESS; + int64_t new_pos = pos; + uint16_t type_id_tmp = 0; + LST_DO_CODE(OB_UNIS_DECODE, type_id_tmp); + pos = new_pos; + if (OB_FAIL(ret)) { + } else if (type_id_tmp == ObNestedType::OB_BASIC_TYPE) { + if (OB_ISNULL(collection_meta = OB_NEWx(ObCollectionBasicType, &allocator))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc element type failed", K(ret)); + } else if (OB_FAIL(collection_meta->deserialize(buf, data_len, pos))) { + LOG_WARN("deserialize element type failed", K(ret)); + } + } else if (type_id_tmp == ObNestedType::OB_ARRAY_TYPE + || type_id_tmp == ObNestedType::OB_VECTOR_TYPE) { + if (OB_ISNULL(collection_meta = OB_NEWx(ObCollectionArrayType, &allocator, allocator))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc element type failed", K(ret)); + } else if (OB_FAIL(collection_meta->deserialize(buf, data_len, pos))) { + LOG_WARN("deserialize element type failed", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid array type for serialize", K(ret), K(type_id_tmp)); + } + return ret; +} + +int ObSqlCollectionInfo::set_element_meta_unsigned(ObCollectionBasicType *meta_info) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(meta_info) || meta_info->type_id_ != ObNestedType::OB_BASIC_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid meta info", K(ret), K(meta_info)); + } else { + ObObjType obj_type = meta_info->basic_meta_.get_obj_type(); + switch (obj_type) { + case ObTinyIntType: + meta_info->basic_meta_.meta_.set_utinyint(); + break; + case ObSmallIntType: + meta_info->basic_meta_.meta_.set_usmallint(); + break; + case ObInt32Type: + meta_info->basic_meta_.meta_.set_uint32(); + break; + case ObIntType: + meta_info->basic_meta_.meta_.set_uint64(); + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid meta info", K(ret), K(meta_info)); + } + } + return ret; +} + +int ObSqlCollectionInfo::set_element_meta_info(const std::string &name, uint8_t meta_attr_idx, ObCollectionBasicType *meta_info) +{ + /* meta_attr_idx = 0 value is precision + meta_attr_idx = 1 value is scale + */ + int ret = OB_SUCCESS; + int32_t val = std::stoi(name); + if (OB_ISNULL(meta_info) || meta_info->type_id_ != ObNestedType::OB_BASIC_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid meta info", K(ret), K(meta_info)); + } else { + ObObjType obj_type = meta_info->basic_meta_.get_obj_type(); + ObObjTypeClass tc = meta_info->basic_meta_.get_type_class(); + const ObAccuracy &default_accuracy = ObAccuracy::DDL_DEFAULT_ACCURACY2[0][obj_type]; + switch (tc) { + case ObIntTC: + case ObUIntTC: + // use default precision + meta_info->basic_meta_.set_precision(default_accuracy.get_precision()); + meta_info->basic_meta_.set_scale(0); + break; + case ObFloatTC: + case ObDoubleTC: + // use default precision + meta_info->basic_meta_.set_precision(default_accuracy.get_precision()); + meta_info->basic_meta_.set_scale(default_accuracy.get_scale()); + break; + case ObStringTC: + meta_info->basic_meta_.set_length(val); + break; + case ObDecimalIntTC : + if (meta_attr_idx == 0) { + meta_info->basic_meta_.set_precision(val); + } else if (meta_attr_idx == 1) { + meta_info->basic_meta_.set_scale(val); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected meta_attr_idx", K(ret), K(meta_attr_idx)); + } + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid data type", K(ret), K(tc)); + } + } + return ret; +} + +int ObSqlCollectionInfo::create_meta_info_by_name(const std::string &name, ObCollectionTypeBase *&meta_info, uint8_t &arr_depth) +{ + int ret = OB_SUCCESS; + if (0 == name.compare("ARRAY") || 0 == name.compare("VECTOR")) { + if (OB_ISNULL(meta_info = OB_NEWx(ObCollectionArrayType, &allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to create array type meta", K(ret)); + } else { + const uint8_t OB_ARRAY_MAX_NESTED_LEVEL = 6;/* constistent with pg*/ + meta_info->type_id_ = (0 == name.compare("ARRAY")) ? + ObNestedType::OB_ARRAY_TYPE : ObNestedType::OB_VECTOR_TYPE; + if (++arr_depth > OB_ARRAY_MAX_NESTED_LEVEL) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported array depth", K(ret), K(arr_depth), K(OB_ARRAY_MAX_NESTED_LEVEL)); + } + } + } else { + if (OB_ISNULL(meta_info = OB_NEWx(ObCollectionBasicType, &allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to create basic element type meta", K(ret)); + } else { + meta_info->type_id_ = ObNestedType::OB_BASIC_TYPE; + } + } + + if (OB_FAIL(ret)) { + } else if (0 == name.compare("NULL")) { + static_cast(meta_info)->basic_meta_.meta_.set_null(); + } else if (0 == name.compare("TINYINT")) { + static_cast(meta_info)->basic_meta_.meta_.set_tinyint(); + } else if (0 == name.compare("SMALLINT")) { + static_cast(meta_info)->basic_meta_.meta_.set_smallint(); + } else if (0 == name.compare("MEDIUMINT")) { + static_cast(meta_info)->basic_meta_.meta_.set_mediumint(); + } else if (0 == name.compare("INT")) { + static_cast(meta_info)->basic_meta_.meta_.set_int32(); + } else if (0 == name.compare("BIGINT")) { + static_cast(meta_info)->basic_meta_.meta_.set_int(); + } else if (0 == name.compare("FLOAT")) { + static_cast(meta_info)->basic_meta_.meta_.set_float(); + } else if (0 == name.compare("DOUBLE")) { + static_cast(meta_info)->basic_meta_.meta_.set_double(); + } else if (0 == name.compare("DECIMAL")) { + static_cast(meta_info)->basic_meta_.meta_.set_number(); + } else if (0 == name.compare("DATETIME")) { + static_cast(meta_info)->basic_meta_.meta_.set_datetime(); + } else if (0 == name.compare("TIMESTAMP")) { + static_cast(meta_info)->basic_meta_.meta_.set_timestamp(); + } else if (0 == name.compare("DATE")) { + static_cast(meta_info)->basic_meta_.meta_.set_date(); + } else if (0 == name.compare("TIME")) { + static_cast(meta_info)->basic_meta_.meta_.set_time(); + } else if (0 == name.compare("YEAR")) { + static_cast(meta_info)->basic_meta_.meta_.set_year(); + } else if (0 == name.compare("VARCHAR")) { + static_cast(meta_info)->basic_meta_.meta_.set_varchar(); + // use default CS + static_cast(meta_info)->basic_meta_.set_collation_type(CS_TYPE_UTF8MB4_BIN); + static_cast(meta_info)->basic_meta_.set_collation_level(CS_LEVEL_COERCIBLE); + } else if (0 == name.compare("VARBINARY")) { + static_cast(meta_info)->basic_meta_.meta_.set_varbinary(); + } else if (0 == name.compare("CHAR")) { + static_cast(meta_info)->basic_meta_.meta_.set_char(); + } else if (0 == name.compare("BINARY")) { + static_cast(meta_info)->basic_meta_.meta_.set_binary(); + } else if (0 == name.compare("BIT")) { + static_cast(meta_info)->basic_meta_.meta_.set_bit(); + } else if (0 == name.compare("JSON")) { + static_cast(meta_info)->basic_meta_.meta_.set_json(); + } else if (0 == name.compare("GEOMETRY")) { + static_cast(meta_info)->basic_meta_.meta_.set_geometry(); + } else if (0 == name.compare("DECIMAL_INT")) { + static_cast(meta_info)->basic_meta_.meta_.set_decimal_int(); + } else if (0 == name.compare("ARRAY") || 0 == name.compare("VECTOR")) { + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get type by name failed", K(ret)); + } + + return ret; +} + +int ObSqlCollectionInfo::parse_type_info() +{ + int ret = OB_SUCCESS; + bool is_root = true; + uint8_t idx = 0; + ObCollectionTypeBase *curr_meta = NULL; + const std::string type_info(name_def_, name_len_); + const std::regex pattern(R"(\d+|\w+)"); + std::smatch matches; + uint8_t arr_depth = 0; + + std::string::const_iterator searchStart(type_info.cbegin()); + while (OB_SUCC(ret) && std::regex_search(searchStart, type_info.cend(), matches, pattern)) { + for (std::smatch::iterator it = matches.begin(); it != matches.end() && OB_SUCC(ret); ++it) { + const auto& match = *it; + std::string type_name = match.str(); + if (is_root) { + if (OB_FAIL(create_meta_info_by_name(type_name, collection_meta_, arr_depth))) { + LOG_WARN("get type by name failed", K(ret)); + } else { + is_root = false; + curr_meta = collection_meta_; + } + } else if (OB_NOT_NULL(curr_meta) && curr_meta->type_id_ == ObNestedType::OB_ARRAY_TYPE + && OB_FAIL(create_meta_info_by_name(type_name, static_cast(curr_meta)->element_type_, arr_depth))) { + LOG_WARN("create meta info failed", K(ret)); + } else if (OB_NOT_NULL(curr_meta) && curr_meta->type_id_ == ObNestedType::OB_ARRAY_TYPE) { + curr_meta = static_cast(curr_meta)->element_type_; + } else if (OB_NOT_NULL(curr_meta) && curr_meta->type_id_ == ObNestedType::OB_VECTOR_TYPE + && isNumber(type_name)) { + // vector element is float + std::string vector_elem = "FLOAT"; + if (OB_FAIL(create_meta_info_by_name(vector_elem, static_cast(curr_meta)->element_type_, arr_depth))) { + LOG_WARN("create meta info failed", K(ret)); + } else { + int32_t dim = std::stoi(type_name); + static_cast(curr_meta)->dim_cnt_ = dim; + } + } else if (isNumber(type_name) && OB_FAIL(set_element_meta_info(type_name, idx++, static_cast(curr_meta)))) { + LOG_WARN("set element meta info failed", K(ret)); + } else if (0 == type_name.compare("UNSIGNED") && OB_FAIL(set_element_meta_unsigned(static_cast(curr_meta)))) { + LOG_WARN("set element meta unsighed failed", K(ret)); + } + } + searchStart = matches.suffix().first; + } + return ret; +} + +bool ObSqlCollectionInfo::has_same_super_type(const ObSqlCollectionInfo &other) const +{ + bool b_ret = false; + if (OB_ISNULL(collection_meta_) || OB_ISNULL(other.collection_meta_)) { + // return false + } else if (collection_meta_->get_compatiable_type_id() != other.collection_meta_->get_compatiable_type_id()) { + // return false + } else { + if (collection_meta_->type_id_ == ObNestedType::OB_BASIC_TYPE) { + b_ret = static_cast(collection_meta_)->has_same_super_type(*static_cast(other.collection_meta_)); + } else { + b_ret = static_cast(collection_meta_)->has_same_super_type(*static_cast(other.collection_meta_)); + } + } + return b_ret; +} + +int ObSqlCollectionInfo::get_child_def_string(ObString &child_def) const +{ + int ret = OB_SUCCESS; + const uint32_t min_len = 7; // array() + if (name_len_ <= min_len) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(*this)); + } else { + child_def = ObString(name_len_ - min_len, name_def_ + (min_len - 1)); + } + return ret; +} + +} // namespace common +} // namespace oceanbase \ No newline at end of file diff --git a/deps/oblib/src/lib/udt/ob_collection_type.h b/deps/oblib/src/lib/udt/ob_collection_type.h new file mode 100644 index 0000000000..70153a0818 --- /dev/null +++ b/deps/oblib/src/lib/udt/ob_collection_type.h @@ -0,0 +1,123 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_OB_SQL_COLLECTION_TYPE_ +#define OCEANBASE_OB_SQL_COLLECTION_TYPE_ +#include +#include +#include "common/object/ob_object.h" +#include "lib/string/ob_string.h" +#include "lib/oblog/ob_log_module.h" +#include "common/ob_field.h" + +namespace oceanbase { +namespace common { + +enum ObNestedType { + OB_BASIC_TYPE = 0, + OB_ARRAY_TYPE = 1, + OB_VECTOR_TYPE = 2, +}; + +class ObCollectionTypeBase { +public: + PURE_VIRTUAL_NEED_SERIALIZE_AND_DESERIALIZE; + + virtual int deep_copy(ObIAllocator &allocator, ObCollectionTypeBase *&dst) const = 0; + virtual const ObDataType &get_basic_meta(uint32_t &depth) const = 0; + uint16_t get_compatiable_type_id() const { return type_id_ == OB_VECTOR_TYPE ? OB_ARRAY_TYPE : type_id_; } + uint16_t type_id_; // array/vector/map +}; + +class ObCollectionArrayType : public ObCollectionTypeBase +{ +public: + ObCollectionArrayType(ObIAllocator &allocator) : allocator_(allocator) , dim_cnt_(0), element_type_(nullptr) {} + virtual ~ObCollectionArrayType() {} + int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; + int deserialize(const char *buf, const int64_t data_len, int64_t &pos); + int64_t get_serialize_size() const; + int deep_copy(ObIAllocator &allocator, ObCollectionTypeBase *&dst) const; + bool has_same_super_type(const ObCollectionArrayType &other) const; + const ObDataType &get_basic_meta(uint32_t &depth) const { depth++; return element_type_->get_basic_meta(depth);} + + ObIAllocator &allocator_; + uint32_t dim_cnt_; // vector dimension + ObCollectionTypeBase *element_type_; +}; + +class ObCollectionBasicType : public ObCollectionTypeBase +{ +public: + int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; + int deserialize(const char *buf, const int64_t data_len, int64_t &pos); + int64_t get_serialize_size() const; + int deep_copy(ObIAllocator &allocator, ObCollectionTypeBase *&dst) const; + bool has_same_super_type(const ObCollectionBasicType &other) const; + const ObDataType &get_basic_meta(uint32_t &depth) const { UNUSED(depth); return basic_meta_; } + ObDataType basic_meta_; +}; + +typedef struct ObSqlCollectionInfo +{ + OB_UNIS_VERSION(1); +public: + + ObSqlCollectionInfo(ObIAllocator &allocator) + : allocator_(allocator), name_len_(0), + name_def_(nullptr), collection_meta_(nullptr) {} + virtual ~ObSqlCollectionInfo() {} + void set_name(ObString &name) + { + name_def_ = name.ptr(); + name_len_ = name.length(); + } + + bool is_same(const ObSqlCollectionInfo &other) // test only remove later + { + bool is_same = (strncmp(name_def_, other.name_def_, name_len_) == 0); + return is_same; + } + + ObString get_def_string() const {return ObString(name_len_, name_def_);} + int get_child_def_string(ObString &child_def) const; + int deep_copy(ObIAllocator &allocator, ObSqlCollectionInfo *&dst) const; + int create_meta_info_by_name(const std::string &name, ObCollectionTypeBase *&meta_info, uint8_t &arr_depth); + int set_element_meta_info(const std::string &name, uint8_t meta_attr_idx, ObCollectionBasicType *meta_info); + int set_element_meta_unsigned(ObCollectionBasicType *meta_info); + static int collection_type_deserialize(ObIAllocator &allocator, const char* buf, const int64_t data_len, int64_t& pos, + ObCollectionTypeBase *&collection_meta); + bool has_same_super_type(const ObSqlCollectionInfo &other) const; + const ObDataType &get_basic_meta(uint32_t &depth) const { depth = 0; return collection_meta_->get_basic_meta(depth); } + int parse_type_info(); + TO_STRING_KV(K(ObString(name_len_, name_def_))); + +private: + inline bool isNumber(std::string &str) { + for (int i = 0; i < str.length(); i++) { + if (!std::isdigit(str[i])) { + return false; + } + } + return !str.empty(); + } + +public: + ObIAllocator &allocator_; + size_t name_len_; + const char *name_def_; + ObCollectionTypeBase *collection_meta_; +} ObSqlCollectionInfo; + +} // namespace common +} // namespace oceanbase +#endif // OCEANBASE_OB_SQL_COLLECTION_TYPE_ diff --git a/deps/oblib/src/lib/vector/ob_vector_util.cpp b/deps/oblib/src/lib/vector/ob_vector_util.cpp new file mode 100644 index 0000000000..769c96adc8 --- /dev/null +++ b/deps/oblib/src/lib/vector/ob_vector_util.cpp @@ -0,0 +1,151 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG + +#include "ob_vector_util.h" +#include "lib/oblog/ob_log.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/string/ob_string.h" +#include + + +namespace oceanbase { +namespace common { +namespace obvectorutil { + +void ObVsagLogger::SetLevel(Level Log_level) +{ + //obvectorlib::set_log_level(Log_level); +} +void ObVsagLogger::Trace(const std::string& msg) +{ + ObString Log = ObString(msg.size(), msg.c_str()); + LOG_TRACE("[Vsag]",K(Log)); +} + +void ObVsagLogger::Debug(const std::string& msg) +{ + ObString Log = ObString(msg.size(), msg.c_str()); + LOG_TRACE("[Vsag]",K(Log)); +} + +void ObVsagLogger::Info(const std::string& msg) +{ + ObString Log = ObString(msg.size(), msg.c_str()); + LOG_TRACE("[Vsag]",K(Log)); +} + +void ObVsagLogger::Warn(const std::string& msg) +{ + int ret=0; + ObString Log = ObString(msg.size(), msg.c_str()); + LOG_WARN("[Vsag]",K(Log)); +} + +void ObVsagLogger::Error(const std::string& msg) +{ + int ret=0; + ObString Log = ObString(msg.size(), msg.c_str()); + LOG_ERROR("[Vsag]",K(Log)); +} + +void ObVsagLogger::Critical(const std::string& msg) +{ + ObString Log = ObString(msg.size(), msg.c_str()); + LOG_TRACE("[Vsag]",K(Log)); +} + +int init_vasg_logger(void* logger) +{ + INIT_SUCC(ret); + if (!check_vsag_init()) { + return -4016; + } else { + obvectorlib::set_logger(logger); + obvectorlib::set_log_level(static_cast(1)); + } + return 0; +} + +bool check_vsag_init() +{ + INIT_SUCC(ret); + return obvectorlib::is_init(); +} + +int create_index(obvectorlib::VectorIndexPtr& index_handler, int index_type, + const char* dtype, const char* metric, int dim, + int max_degree, int ef_construction, int ef_search, + void* allocator) +{ + INIT_SUCC(ret); + obvectorlib::set_block_size_limit(2*1024*1024); + return obvectorlib::create_index(index_handler, + static_cast(index_type), + dtype, metric, + dim, + max_degree, + ef_construction, + ef_search, + allocator); +} + +int build_index(obvectorlib::VectorIndexPtr index_handler, float* vector_list, int64_t* ids, int dim, int size) +{ + INIT_SUCC(ret); + return obvectorlib::build_index(index_handler, vector_list, ids, dim, size); +} + +int add_index(obvectorlib::VectorIndexPtr index_handler, float* vector_list, int64_t* ids, int dim, int size) +{ + INIT_SUCC(ret); + return obvectorlib::add_index(index_handler, vector_list, ids, dim, size); +} + +int get_index_number(obvectorlib::VectorIndexPtr index_handler, int64_t &size) +{ + INIT_SUCC(ret); + return obvectorlib::get_index_number(index_handler, size); +} + +int knn_search(obvectorlib::VectorIndexPtr index_handler, float* query_vector,int dim, int64_t topk, + const float*& result_dist, const int64_t*& result_ids, int64_t &result_size, int ef_search, + void* invalid) +{ + INIT_SUCC(ret); + return obvectorlib::knn_search(index_handler, query_vector, dim, topk, + result_dist, result_ids, result_size, + ef_search, invalid); +} + +int fserialize(obvectorlib::VectorIndexPtr index_handler, std::ostream& out_stream) +{ + INIT_SUCC(ret); + return obvectorlib::fserialize(index_handler, out_stream); +} + +int fdeserialize(obvectorlib::VectorIndexPtr& index_handler, std::istream& in_stream) +{ + INIT_SUCC(ret); + return obvectorlib::fdeserialize(index_handler,in_stream); +} + +int delete_index(obvectorlib::VectorIndexPtr& index_handler) +{ + INIT_SUCC(ret); + return obvectorlib::delete_index(index_handler); +} + +} //namespace obvectorlib +} //namespace common +} //namespace oceanbase \ No newline at end of file diff --git a/deps/oblib/src/lib/vector/ob_vector_util.h b/deps/oblib/src/lib/vector/ob_vector_util.h new file mode 100644 index 0000000000..9e2af529ac --- /dev/null +++ b/deps/oblib/src/lib/vector/ob_vector_util.h @@ -0,0 +1,73 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_VECTOR_UTIL_H +#define OB_VECTOR_UTIL_H +#include +#include +#include +#include +#include + +namespace oceanbase { +namespace common { +namespace obvectorutil { + +class ObVsagLogger : public vsag::Logger { + public: + void SetLevel(Level Log_level) override; + void + Trace(const std::string& msg) override; + + void + Debug(const std::string& msg) override; + + void + Info(const std::string& msg) override; + + void + Warn(const std::string& msg) override; + + void + Error(const std::string& msg) override; + + void + Critical(const std::string& msg) override; +}; + +int init_vasg_logger(void* logger); + +bool check_vsag_init(); + +int create_index(obvectorlib::VectorIndexPtr& index_handler, int index_type, + const char* dtype, const char* metric, int dim, + int max_degree, int ef_construction, int ef_search, + void* allocator = NULL); + +int build_index(obvectorlib::VectorIndexPtr index_handler, float* vector_list, int64_t* ids, int dim, int size); + +int add_index(obvectorlib::VectorIndexPtr index_handler,float* vector_list, int64_t* ids, int dim, int size); + +int get_index_number(obvectorlib::VectorIndexPtr index_handler, int64_t &size); + +int knn_search(obvectorlib::VectorIndexPtr index_handler,float* query_vector,int dim, int64_t topk, + const float*& result_dist, const int64_t*& result_ids, int64_t &result_size, int ef_search, + void* invalid = NULL); + +int fserialize(obvectorlib::VectorIndexPtr index_handler, std::ostream& out_stream); + +int fdeserialize(obvectorlib::VectorIndexPtr& index_handler, std::istream& in_stream); +int delete_index(obvectorlib::VectorIndexPtr& index_handler); +} // namesapce obvectorutil +} // namespace common +} // namespace oceanbase +#endif /* OB_VECTOR_UTIL_H */ diff --git a/deps/oblib/src/rpc/obmysql/ob_mysql_global.h b/deps/oblib/src/rpc/obmysql/ob_mysql_global.h index 57a26d659b..afad6d915d 100644 --- a/deps/oblib/src/rpc/obmysql/ob_mysql_global.h +++ b/deps/oblib/src/rpc/obmysql/ob_mysql_global.h @@ -361,6 +361,12 @@ inline const char *get_emysql_field_type_str(const obmysql::EMySQLFieldType &typ case obmysql::MYSQL_TYPE_ROARINGBITMAP: str = "MYSQL_TYPE_ROARINGBITMAP"; break; + case obmysql::MYSQL_TYPE_OB_VECTOR: + str = "MYSQL_TYPE_OB_VECTOR"; + break; + case obmysql::MYSQL_TYPE_OB_ARRAY: + str = "MYSQL_TYPE_OB_ARRAY"; + break; case obmysql::MYSQL_TYPE_NEWDECIMAL: str = "MYSQL_TYPE_NEWDECIMAL"; break; diff --git a/deps/oblib/src/rpc/obrpc/ob_rpc_packet_list.h b/deps/oblib/src/rpc/obrpc/ob_rpc_packet_list.h index 7b82a5cb73..8c475b15c3 100644 --- a/deps/oblib/src/rpc/obrpc/ob_rpc_packet_list.h +++ b/deps/oblib/src/rpc/obrpc/ob_rpc_packet_list.h @@ -1189,7 +1189,7 @@ PCODE_DEF(OB_CAL_STANDBY_TENANT_PHY_RESOURCE, 0x1623) //PCODE_DEF(OB_UPDATE_MVIEW_REFERENCE_TABLE_STATUS, 0x1624) //PCODE_DEF(OB_DO_EVENT_DDL, 0x1625) -PCODE_DEF(OB_GENERATE_AUX_INDEX_SCHEMA, 0x1626) +PCODE_DEF(OB_CREATE_AUX_INDEX, 0x1626) // FARM COMPAT WHITELIST //PCODE_DEF(OB_GET_SS_MACRO_BLOCK_BY_URI, 0x1627) //PCODE_DEF(OB_GET_SS_MACRO_BLOCK, 0x1628) diff --git a/mittest/mtlenv/CMakeLists.txt b/mittest/mtlenv/CMakeLists.txt index 32c986f691..1076071ace 100644 --- a/mittest/mtlenv/CMakeLists.txt +++ b/mittest/mtlenv/CMakeLists.txt @@ -4,6 +4,7 @@ storage_unittest(test_tx_data_table) #storage_unittest(test_multi_tenant test_multi_tenant.cpp) storage_unittest(test_buffer_ctx_node test_buffer_ctx_node.cpp) ob_unittest(test_htable_lock test_htable_lock.cpp) +storage_unittest(test_vector_index_adaptor test_vector_index_adaptor.cpp) add_subdirectory(storage) add_subdirectory(tablelock) \ No newline at end of file diff --git a/mittest/mtlenv/test_vector_index_adaptor.cpp b/mittest/mtlenv/test_vector_index_adaptor.cpp new file mode 100644 index 0000000000..2231a2f180 --- /dev/null +++ b/mittest/mtlenv/test_vector_index_adaptor.cpp @@ -0,0 +1,1264 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include +#define private public +#define protected public + +#include "lib/string/ob_sql_string.h" +#include "roaring/roaring64.h" +#include "share/vector_index/ob_plugin_vector_index_adaptor.h" +#include "share/vector_index/ob_plugin_vector_index_serialize.h" +#include "storage/ob_i_store.h" +#include "mtlenv/mock_tenant_module_env.h" +#include "lib/oblog/ob_log_module.h" + +#include "lib/allocator/ob_mod_define.h" +#include "lib/allocator/page_arena.h" +#include "lib/vector/ob_vector_util.h" + +#undef private +#undef protected +#include +#include + +namespace oceanbase { + + +using namespace storage; +using namespace common; + +class TestVectorIndexAdaptor : public ::testing::Test { +public: + TestVectorIndexAdaptor() + {} + ~TestVectorIndexAdaptor() + {} + + static void SetUpTestCase() + { + EXPECT_EQ(OB_SUCCESS, MockTenantModuleEnv::get_instance().init()); + MTL(transaction::ObTransService*)->tx_desc_mgr_.tx_id_allocator_ = + [](transaction::ObTransID &tx_id) { tx_id = transaction::ObTransID(1001); return OB_SUCCESS; }; + ObServerCheckpointSlogHandler::get_instance().is_started_ = true; + } + static void TearDownTestCase() + { + MockTenantModuleEnv::get_instance().destroy(); + } + virtual void SetUp() + { + ASSERT_TRUE(MockTenantModuleEnv::get_instance().is_inited()); + + } + virtual void TearDown() + {} + +private: + // disallow copy + DISALLOW_COPY_AND_ASSIGN(TestVectorIndexAdaptor); +}; + + +void* ObArenaAllocator_malloc_adapter(size_t size, ObArenaAllocator* allocator) { + // 调用 ObArenaAllocator 的 alloc 方法并转换 size + return allocator->alloc(static_cast(size)); +} + +ObArenaAllocator myAllocator(ObModIds::TEST); + +void* global_allocator_malloc(size_t size) { + return ObArenaAllocator_malloc_adapter(size, &myAllocator); +} + +void* global_allocator_free(void *size) { + void *res = nullptr; + return res; +} + +/* +TEST_F(TestVectorIndexAdaptor, bitmap_alloc) +{ + roaring_memory_t memory_hook; + memory_hook.malloc = &global_allocator_malloc; + roaring_init_memory_hook(memory_hook); + + roaring::api::roaring64_bitmap_t *bitmap = roaring::api::roaring64_bitmap_create(); + roaring::api::roaring64_bitmap_add(bitmap, 3); + roaring::api::roaring64_bitmap_add(bitmap, 5); + roaring::api::roaring64_bitmap_add(bitmap, 7); + + ASSERT_EQ(roaring64_bitmap_contains(bitmap, 5), true); + ASSERT_EQ(roaring64_bitmap_contains(bitmap, 20), false); +} +*/ + +/* +void *ob_bitmap_mallloc(size_t size) +{ + return oceanbase::common::ob_malloc(size, oceanbase::common::ObModIds::TEST); +} + +void *ob_bitmap_realloc(void *ptr, size_t size) +{ + ObMemAttr attr(oceanbase::common::OB_SERVER_TENANT_ID, "test"); + return oceanbase::common::ob_realloc(ptr, size, attr); +} + +void *ob_bitmap_aligned_malloc(size_t a, size_t b) +{ + ObMemAttr attr(oceanbase::common::OB_SERVER_TENANT_ID, "test"); + return oceanbase::common::ob_malloc_align(a, b, attr); +} + +void ob_bitmap_free(void *ptr) +{ + oceanbase::common::ob_free(ptr); +} + +void ob_bitmap_aligned_free(void *ptr) +{ + oceanbase::common::ob_free_align(ptr); +} + + +TEST_F(TestVectorIndexAdaptor, bitmap_malloc) +{ + roaring_memory_t memory_hook; + memory_hook.malloc = &ob_bitmap_mallloc; + memory_hook.realloc = &ob_bitmap_realloc; + memory_hook.aligned_malloc = &ob_bitmap_aligned_malloc; + memory_hook.free = &ob_bitmap_free; + memory_hook.aligned_free = &ob_bitmap_aligned_free; + roaring_init_memory_hook(memory_hook); + + roaring::api::roaring64_bitmap_t *bitmap = roaring::api::roaring64_bitmap_create(); + roaring::api::roaring64_bitmap_add(bitmap, 3); + + ASSERT_EQ(roaring64_bitmap_contains(bitmap, 3), true); + +} + +TEST_F(TestVectorIndexAdaptor, bitmap_flip) +{ + ObArenaAllocator allocator(ObModIds::TEST); + + roaring::api::roaring64_bitmap_t *bitmap = roaring::api::roaring64_bitmap_create(); + roaring::api::roaring64_bitmap_add(bitmap, 3); + roaring::api::roaring64_bitmap_add(bitmap, 5); + roaring::api::roaring64_bitmap_add(bitmap, 7); + + ASSERT_EQ(roaring64_bitmap_contains(bitmap, 5), true); + ASSERT_EQ(roaring64_bitmap_contains(bitmap, 20), false); + + ASSERT_EQ(roaring64_bitmap_minimum(bitmap), 3); + ASSERT_EQ(roaring64_bitmap_maximum(bitmap), 7); + + roaring::api::roaring64_bitmap_t *flip_bitmap = + roaring64_bitmap_flip_closed(bitmap, 3, 7); + + ASSERT_EQ(roaring64_bitmap_contains(flip_bitmap, 5), false); + ASSERT_EQ(roaring64_bitmap_contains(flip_bitmap, 7), false); + ASSERT_EQ(roaring64_bitmap_contains(flip_bitmap, 4), true); + + + std::cout << roaring64_bitmap_contains(bitmap, 5) << std::endl; +} + + +TEST_F(TestVectorIndexAdaptor, result) +{ + int dim = 10; + void* index = nullptr; + std::mt19937 rng; + rng.seed(47); + //ASSERT_EQ(obvectorlib::init(), true); + std::uniform_real_distribution<> distrib_real; + ASSERT_EQ(obvectorlib::create_index(dim, dim, 200, 100, index), 0); + + float a[110] = {0.203846,0.205289,0.880265,0.824340,0.615737,0.496899,0.983632,0.865571,0.248373,0.542833, + 0.735541,0.670776,0.903237,0.447223,0.232028,0.659316,0.765661,0.226980,0.579658,0.933939, + 0.327936,0.048756,0.084670,0.389642,0.970982,0.370915,0.181664,0.940780,0.013905,0.628127, + 0.148869,0.878546,0.028024,0.326642,0.044912,0.144034,0.717580,0.442633,0.637534,0.633993, + 0.334970,0.857377,0.886132,0.668270,0.983913,0.418145,0.208459,0.190118,0.959676,0.796483, + 0.117582,0.302352,0.471198,0.248725,0.315868,0.717533,0.028496,0.710370,0.007130,0.710913, + 0.551185,0.231134,0.075354,0.230557,0.248149,0.383390,0.483179,0.238120,0.289662,0.970101, + 0.185221,0.315131,0.558301,0.543172,0.335010,0.556101,0.595842,0.168794,0.567442,0.062338, + 0.928764,0.254038,0.272721,0.648755,0.966464,0.200054,0.093298,0.901419,0.676738,0.122339, + 0.345999,0.254102,0.950869,0.275233,0.844568,0.215723,0.302821,0.563644,0.811224,0.175574, + 0.615558,0.613338,0.031494,0.114999,0.713017,0.792606,0.551865,0.990780,0.034867,0.062117}; + + int64_t vids[11] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + + ASSERT_EQ(obvectorlib::add_index(dim, a, vids, 11, index), 0); + + int k = 3; + int64_t result_size = 0; + const float* res_dist = new float[k]; + const int64_t* res_ids = new int64_t[k]; + std::map myMap; + std::shared_ptr> myMapPtr = std::make_shared>(myMap); + + float search[10] = {0.712338,0.603321,0.133444,0.428146,0.876387,0.763293,0.408760,0.765300,0.560072,0.900498}; + + ASSERT_EQ(obvectorlib::knn_search1(dim, index, search, k, res_dist, res_ids, result_size, myMapPtr), 0); + + std::cout << "vid: " << res_ids[0] << " " << "dis: " << res_dist[0] << std::endl; + +} +*/ + +/* +TEST_F(TestVectorIndexAdaptor, vsag_index) +{ + int dim = 16; + void* index = nullptr; + std::mt19937 rng; + rng.seed(47); + //ASSERT_EQ(obvectorlib::init(), true); + std::uniform_real_distribution<> distrib_real; + ASSERT_EQ(obvectorlib::create_index(dim, dim, 200, 100, index), 0); + std::cout << "index: " << index << " " << (index == nullptr) << std::endl; + ASSERT_EQ(index == nullptr, 0); + + int num_vectors = 10; + float *dis = new float[num_vectors * dim]; + for (int cnt = 0; cnt < 10; cnt++) { + for (int64_t i = 0; i < num_vectors * dim; ++i) { + dis[i] = distrib_real(rng); + } + ASSERT_EQ(obvectorlib::add_index(dim, dis, num_vectors, index), 0); + std::cout << "add_index " << cnt << " finish." << std::endl; + } + + std::cout << "add_index finish" << std::endl; + + float *search = new float[dim]; + for (int64_t i = 0; i < dim; ++i) { + search[i] = distrib_real(rng); + } + int k = 10; + std::cout << "search info finish" << std::endl; + const float* res_dist = new float[k]; + const int64_t* res_ids = new int64_t[k]; + int64_t result_size = 0; + + ASSERT_EQ(obvectorlib::knn_search(dim, index, search, k, res_dist, res_ids, result_size), 0); + + std::cout << "knn_search" << std::endl; + for (int i = 0; i < result_size; i++) { + std::cout << "vid: " << res_ids[i] << " " << "dis: " << res_dist[i] << std::endl; + } + + roaring::api::roaring64_bitmap_t *bitmap = roaring::api::roaring64_bitmap_create(); + + uint64_t *vid = new uint64_t[20]; + for (int i = 0; i < 20; i++) { + vid[i] = i + 1; + } + std::cout << "build roaring bitmap" << std::endl; + roaring::api::roaring64_bitmap_add_many(bitmap, 20, vid); + std::map myMap; + + std::cout << "get roaring bitmap iter" << std::endl; + uint64_t *buf = new uint64_t[20]; + roaring::api::roaring64_iterator_t *roaring_iter = roaring64_iterator_create(bitmap); + uint64_t ele_cnt = roaring64_iterator_read(roaring_iter, buf, 20); + + std::cout << "get roaring bitmap buf" << std::endl; + for (int i = 0; i < ele_cnt; i++) { + std::cout << buf[i] << " "; + myMap[buf[i]] = false; + } + std::shared_ptr> myMapPtr = std::make_shared>(myMap); + std::cout << std::endl << "before knn search 1" << std::endl; + + const float* res_dist1 = new float[k]; + const int64_t* res_ids1 = new int64_t[k]; + int64_t result_size1 = 0; + ASSERT_EQ(obvectorlib::knn_search1(dim, index, search, k, res_dist1, res_ids1, result_size1, myMapPtr), 0); + + std::cout << "knn_search1 result" << std::endl; + for (int i = 0; i < result_size; i++) { + std::cout << "vid: " << res_ids1[i] << " " << "dis: " << res_dist1[i] << std::endl; + } + +} + +using namespace oceanbase::share; +TEST_F(TestVectorIndexAdaptor, adapt) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObVectorIndexHNSWParam param; + param.type_ = ObVectorIndexAlgorithmType::VIAT_HNSW; + param.lib_ = ObVectorIndexAlgorithmLib::VIAL_VSAG; + param.dim_ = 10; + param.m_ = 10; + param.ef_construction_ = 200; + param.ef_search_ = 200; + + // serialize param + int64_t param_len = param.get_serialize_size(); + char *param_buf = static_cast(allocator.alloc(sizeof(char) * param_len)); + int64_t pos = 0; + ASSERT_EQ(param.serialize(param_buf, param_len, pos), 0); + + // init adaptor + ObPluginVectorIndexAdaptor adaptor(&allocator); + adaptor.init_incr_tablet(); + ASSERT_EQ(adaptor.init(ObString(param_len, param_buf)), 0); + + // adaptor param + int64_t dim = 0; + ASSERT_EQ(adaptor.get_dim(dim), 0); + std::cout << "dim: " << dim << std::endl; + + // insert_rows + ObStoreRow *rows = nullptr; + int64_t row_cnt = 100; + rows = static_cast(allocator.alloc(sizeof(ObStoreRow) * row_cnt)); + ASSERT_EQ(rows == nullptr, 0); + std::cout << "create rows" << std::endl; + + std::mt19937 rng; + rng.seed(88); + std::uniform_real_distribution<> distrib_real; + float a = distrib_real(rng); + std::cout << "new float" << std::endl; + uint64_t vid = 0; + + for (int i = 0; i < row_cnt; i++) { + float *vector = static_cast(allocator.alloc(sizeof(float) * dim)); + ASSERT_EQ(vector == nullptr, 0); + for (int j = 0; j < dim; j++) { + vector[j] = distrib_real(rng); + std::cout << vector[j] << " "; + } + std::cout << std::endl; + char *vector_void = reinterpret_cast(vector); + int64_t vector_str_len = 40; + + ObStoreRow *row = nullptr; + ASSERT_EQ(OB_SUCCESS, malloc_store_row(allocator, 3, row, FLAT_ROW_STORE)); + row->row_val_.cells_[0].set_uint64(vid++); + row->row_val_.cells_[2].set_string(ObVarcharType, vector_void, vector_str_len); + rows[i] = *row; + } + std::cout << "build row finish." << std::endl; + + ASSERT_EQ(adaptor.insert_rows(rows, row_cnt), 0); + + float *search = new float[dim]; + std::map myMap; + int k = 5; + for (int64_t i = 0; i < dim; ++i) { + search[i] = distrib_real(rng); + } + + const float* res_dist1 = new float[k]; + const int64_t* res_ids1 = new int64_t[k]; + int64_t result_size1 = 0; + std::shared_ptr> myMapPtr; + uint64_t insert_min = roaring64_bitmap_minimum(adaptor.get_incr_ibitmap()); + uint64_t insert_max = roaring64_bitmap_maximum(adaptor.get_incr_ibitmap()); + roaring::api::roaring64_bitmap_t *insert_flip_bitmap = + roaring64_bitmap_flip_closed(adaptor.get_incr_ibitmap(), insert_min, insert_max); + ASSERT_EQ(ObPluginVectorIndexAdaptor::cast_roaringbitmap_to_stdmap(insert_flip_bitmap, myMapPtr), 0); + ASSERT_EQ(obvectorlib::knn_search1(dim, adaptor.get_incr_index(), search, k, res_dist1, res_ids1, result_size1, myMapPtr), 0); + + std::cout << "knn_search1 result" << std::endl; + for (int i = 0; i < result_size1; i++) { + std::cout << "vid: " << res_ids1[i] << " " << "dis: " << res_dist1[i] << std::endl; + } + +} +*/ + +using namespace oceanbase::share; + + +TEST_F(TestVectorIndexAdaptor, vsag_add_duplicate) +{ + obvectorlib::VectorIndexPtr index_handler = nullptr; + std::mt19937 rng; + rng.seed(47); + int dim = 128; + int max_degree = 16; + int ef_search = 200; + int ef_construction = 100; + const char* const METRIC_L2 = "l2"; + const char* const DATATYPE_FLOAT32 = "float32"; + + ASSERT_EQ(obvectorutil::create_index(index_handler, + obvectorlib::HNSW_TYPE, + DATATYPE_FLOAT32, + METRIC_L2, + dim, + max_degree, + ef_construction, + ef_search), 0); + + int num_vectors = 1000; + + int64_t *ids = new int64_t[num_vectors]; + float *vecs = new float[dim * num_vectors]; + + std::uniform_real_distribution<> distrib_real; + for (int64_t i = 0; i < num_vectors; ++i) { + ids[i] = i; + } + + for (int64_t i = 0; i < num_vectors * dim; ++i) { + vecs[i] = distrib_real(rng); + } + + for (int64_t i = 0; i < 10; ++i) { + ASSERT_EQ(0, obvectorutil::add_index(index_handler, vecs + i * 100, ids + i * 100, dim, 100)); + } + + std::cout << "add duplicate vector" << std::endl; + + ASSERT_EQ(0, obvectorutil::add_index(index_handler, vecs, ids, dim, 100)); + + int64_t index_size = 0; + ASSERT_EQ(0, obvectorutil::get_index_number(index_handler, index_size)); + + std::cout << "vecotr num: " << index_size << std::endl; + +} + +TEST_F(TestVectorIndexAdaptor, vsag_build_index) +{ + //ASSERT_EQ(obvectorutil::example(), 0); + + obvectorlib::VectorIndexPtr index_handler = nullptr; + std::mt19937 rng; + rng.seed(47); + int dim = 128; + int max_degree = 16; + int ef_search = 200; + int ef_construction = 100; + const char* const METRIC_L2 = "l2"; + const char* const DATATYPE_FLOAT32 = "float32"; + + ASSERT_EQ(obvectorutil::create_index(index_handler, + obvectorlib::HNSW_TYPE, + DATATYPE_FLOAT32, + METRIC_L2, + dim, + max_degree, + ef_construction, + ef_search), 0); + + int num_vectors = 10000; + + int64_t *ids = new int64_t[num_vectors]; + float *vecs = new float[dim * num_vectors]; + + std::uniform_real_distribution<> distrib_real; + for (int64_t i = 0; i < num_vectors; ++i) { + ids[i] = i; + } + + for (int64_t i = 0; i < num_vectors * dim; ++i) { + vecs[i] = distrib_real(rng); + } + + ASSERT_EQ(0, obvectorutil::build_index(index_handler, vecs, ids, dim, num_vectors)); + + int64_t index_size = 0; + ASSERT_EQ(0, obvectorutil::get_index_number(index_handler, index_size)); + ASSERT_EQ(index_size, num_vectors); + + const float* result_dist0; + const int64_t* result_ids0; + const float* result_dist1; + const int64_t* result_ids1; + int64_t expect_cnt = 10; + int64_t result_size = 0; + roaring::api::roaring64_bitmap_t *bitmap = roaring::api::roaring64_bitmap_create(); + + std::cout << "===================== Query Vec ================" << std::endl; + float *query_vecs = new float[dim]; + for (int64_t i = 0; i < dim; ++i) { + query_vecs[i] = distrib_real(rng); + if (i == 0) { + std::cout << "[" << vecs[i] << ", "; + } else if (i == dim - 1) { + std::cout << vecs[i] << "]" << std::endl; + } else { + std::cout << vecs[i] << ", "; + } + } + + std::cout << "===================== Query Result ================" << std::endl; + + ASSERT_EQ(0, obvectorutil::knn_search(index_handler, + query_vecs, + dim, + expect_cnt, + result_dist0, + result_ids0, + result_size, + ef_search, + bitmap)); + for (int64_t i = 0; i < result_size; ++i) { + std::cout << i << " id: " << result_ids0[i] << " dis: " << result_dist0[i] << std::endl; + for (int64_t j = 0; j < dim; ++j) { + if (j == 0) { + std::cout << "[" << vecs[i * dim + j] << ", "; + } else if (j == dim - 1) { + std::cout << vecs[i * dim + j] << "]" << std::endl; + } else { + std::cout << vecs[i * dim + j] << ", "; + } + } + } + + std::cout << "===================== Query StdMap ================" << std::endl; + + roaring64_bitmap_add(bitmap, 7055); + roaring64_bitmap_add(bitmap, 2030); + ASSERT_EQ(0, obvectorutil::knn_search(index_handler, + query_vecs, + dim, + expect_cnt, + result_dist1, + result_ids1, + result_size, + ef_search, + bitmap)); + + for (int64_t i = 0; i < result_size; ++i) { + std::cout << i << " id: " << result_ids1[i] << " dis: " << result_dist1[i] << std::endl; + for (int64_t j = 0; j < dim; ++j) { + if (j == 0) { + std::cout << "[" << vecs[i * dim + j] << ", "; + } else if (j == dim - 1) { + std::cout << vecs[i * dim + j] << "]" << std::endl; + } else { + std::cout << vecs[i * dim + j] << ", "; + } + } + } +} + +TEST_F(TestVectorIndexAdaptor, vsag_add_index) +{ + //ASSERT_EQ(obvectorutil::example(), 0); + + obvectorlib::VectorIndexPtr index_handler = nullptr; + std::mt19937 rng; + rng.seed(50); + int dim = 128; + int max_degree = 16; + int ef_search = 200; + int ef_construction = 100; + const char* const METRIC_L2 = "l2"; + const char* const DATATYPE_FLOAT32 = "float32"; + + ASSERT_EQ(obvectorutil::create_index(index_handler, + obvectorlib::HNSW_TYPE, + DATATYPE_FLOAT32, + METRIC_L2, + dim, + max_degree, + ef_construction, + ef_search), 0); + + int num_vectors = 10000; + + int64_t *ids = new int64_t[num_vectors]; + float *vecs = new float[dim * num_vectors]; + + std::uniform_real_distribution<> distrib_real; + for (int64_t i = 0; i < num_vectors; ++i) { + ids[i] = i; + } + + for (int64_t i = 0; i < num_vectors * dim; ++i) { + vecs[i] = distrib_real(rng); + } + + for (int64_t i = 0; i < 100; ++i) { + ASSERT_EQ(0, obvectorutil::add_index(index_handler, vecs + i * 100, ids + i * 100, dim, 100)); + } + //ASSERT_EQ(0, obvectorutil::add_index(index_handler, vecs, ids, dim, num_vectors)); + + int64_t index_size = 0; + ASSERT_EQ(0, obvectorutil::get_index_number(index_handler, index_size)); + ASSERT_EQ(index_size, num_vectors); + + const float* result_dist0; + const int64_t* result_ids0; + const float* result_dist1; + const int64_t* result_ids1; + int64_t expect_cnt = 10; + int64_t result_size = 0; + roaring::api::roaring64_bitmap_t *bitmap = roaring::api::roaring64_bitmap_create(); + + std::cout << "===================== Query Vec ================" << std::endl; + float *query_vecs = new float[dim]; + for (int64_t i = 0; i < dim; ++i) { + query_vecs[i] = distrib_real(rng); + if (i == 0) { + std::cout << "[" << vecs[i] << ", "; + } else if (i == dim - 1) { + std::cout << vecs[i] << "]" << std::endl; + } else { + std::cout << vecs[i] << ", "; + } + } + + std::cout << "===================== Query Result ================" << std::endl; + + ASSERT_EQ(0, obvectorutil::knn_search(index_handler, + query_vecs, + dim, + expect_cnt, + result_dist0, + result_ids0, + result_size, + ef_search, + bitmap)); + for (int64_t i = 0; i < result_size; ++i) { + std::cout << i << " id: " << result_ids0[i] << " dis: " << result_dist0[i] << std::endl; + for (int64_t j = 0; j < dim; ++j) { + if (j == 0) { + std::cout << "[" << vecs[i * dim + j] << ", "; + } else if (j == dim - 1) { + std::cout << vecs[i * dim + j] << "]" << std::endl; + } else { + std::cout << vecs[i * dim + j] << ", "; + } + } + } + + std::cout << "===================== Query StdMap ================" << std::endl; + + roaring64_bitmap_add(bitmap, 545); + roaring64_bitmap_add(bitmap, 3720); + ASSERT_EQ(0, obvectorutil::knn_search(index_handler, + query_vecs, + dim, + expect_cnt, + result_dist1, + result_ids1, + result_size, + ef_search, + bitmap)); + + for (int64_t i = 0; i < result_size; ++i) { + std::cout << i << " id: " << result_ids1[i] << " dis: " << result_dist1[i] << std::endl; + for (int64_t j = 0; j < dim; ++j) { + if (j == 0) { + std::cout << "[" << vecs[i * dim + j] << ", "; + } else if (j == dim - 1) { + std::cout << vecs[i * dim + j] << "]" << std::endl; + } else { + std::cout << vecs[i * dim + j] << ", "; + } + } + } +} + +TEST_F(TestVectorIndexAdaptor, test_insert) +{ + //ASSERT_EQ(obvectorutil::example(), 0); + + obvectorlib::VectorIndexPtr index_handler = nullptr; + std::mt19937 rng; + rng.seed(50); + int dim = 3; + int max_degree = 16; + int ef_search = 200; + int ef_construction = 100; + const char* const METRIC_L2 = "l2"; + const char* const DATATYPE_FLOAT32 = "float32"; + + ASSERT_EQ(obvectorutil::create_index(index_handler, + obvectorlib::HNSW_TYPE, + DATATYPE_FLOAT32, + METRIC_L2, + dim, + max_degree, + ef_construction, + ef_search), 0); + + int num_vectors = 40; + + int64_t *ids = new int64_t[num_vectors]; + float *vecs = new float[dim * num_vectors]; + + std::uniform_real_distribution<> distrib_real; + for (int64_t i = 0; i < num_vectors; ++i) { + ids[i] = i; + } + + for (int64_t i = 0; i < num_vectors * dim; ++i) { + vecs[i] = distrib_real(rng); + } + + // for (int64_t i = 0; i < 100; ++i) { + // ASSERT_EQ(0, obvectorutil::add_index(index_handler, vecs + i * 100, ids + i * 100, dim, 100)); + // } + ASSERT_EQ(0, obvectorutil::add_index(index_handler, vecs, ids, dim, num_vectors)); + + int64_t index_size = 0; + ASSERT_EQ(0, obvectorutil::get_index_number(index_handler, index_size)); + ASSERT_EQ(index_size, num_vectors); + + const float* result_dist0; + const int64_t* result_ids0; + const float* result_dist1; + const int64_t* result_ids1; + int64_t expect_cnt = 10; + int64_t result_size = 0; + roaring::api::roaring64_bitmap_t *bitmap = roaring::api::roaring64_bitmap_create(); + + std::cout << "===================== Query Vec ================" << std::endl; + float *query_vecs = new float[dim]; + for (int64_t i = 0; i < dim; ++i) { + query_vecs[i] = distrib_real(rng); + if (i == 0) { + std::cout << "[" << vecs[i] << ", "; + } else if (i == dim - 1) { + std::cout << vecs[i] << "]" << std::endl; + } else { + std::cout << vecs[i] << ", "; + } + } + + std::cout << "===================== Query Result ================" << std::endl; + + ASSERT_EQ(0, obvectorutil::knn_search(index_handler, + query_vecs, + dim, + expect_cnt, + result_dist0, + result_ids0, + result_size, + ef_search, + bitmap)); + for (int64_t i = 0; i < result_size; ++i) { + std::cout << i << " id: " << result_ids0[i] << " dis: " << result_dist0[i] << std::endl; + for (int64_t j = 0; j < dim; ++j) { + if (j == 0) { + std::cout << "[" << vecs[i * dim + j] << ", "; + } else if (j == dim - 1) { + std::cout << vecs[i * dim + j] << "]" << std::endl; + } else { + std::cout << vecs[i * dim + j] << ", "; + } + } + } + + std::cout << "===================== Query StdMap ================" << std::endl; + + roaring64_bitmap_add(bitmap, 3720); + roaring64_bitmap_add(bitmap, 545); + ASSERT_EQ(0, obvectorutil::knn_search(index_handler, + query_vecs, + dim, + expect_cnt, + result_dist1, + result_ids1, + result_size, + ef_search, + bitmap)); + + for (int64_t i = 0; i < result_size; ++i) { + std::cout << i << " id: " << result_ids1[i] << " dis: " << result_dist1[i] << std::endl; + for (int64_t j = 0; j < dim; ++j) { + if (j == 0) { + std::cout << "[" << vecs[i * dim + j] << ", "; + } else if (j == dim - 1) { + std::cout << vecs[i * dim + j] << "]" << std::endl; + } else { + std::cout << vecs[i * dim + j] << ", "; + } + } + } +} + +class ObTestHNSWSerializeCallback { +public: + struct CbParam : public ObOStreamBuf::CbParam { + CbParam() + : allocator_(nullptr), data_(nullptr), size_(0) + {} + virtual ~CbParam() {} + bool is_valid() const + { + return nullptr != allocator_; + } + ObIAllocator *allocator_; + void *data_; + int64_t size_; + }; +public: + ObTestHNSWSerializeCallback() + {} + int operator()(const char *data, const int64_t data_size, share::ObOStreamBuf::CbParam &cb_param) + { + int ret = OB_SUCCESS; + ObTestHNSWSerializeCallback::CbParam ¶m = static_cast(cb_param); + char *buf = (char*)param.allocator_->alloc(data_size + param.size_); + if (OB_ISNULL(buf)) { + ret = OB_ERR_UNEXPECTED; + } else { + MEMCPY(buf, param.data_, param.size_); + MEMCPY(buf + param.size_, data, data_size); + param.data_ = buf; + param.size_ += data_size; + } + return ret; + } +private: +}; + +class ObTestHNSWDeserializeCallback { +public: + struct CbParam : public ObIStreamBuf::CbParam { + CbParam() + : allocator_(nullptr), data_(nullptr), size_(0), cur_pos_(0), part_size_(0) + {} + virtual ~CbParam() {} + bool is_valid() const + { + return nullptr != data_ + && nullptr != allocator_; + } + ObIAllocator *allocator_; + void *data_; + int64_t size_; + int64_t cur_pos_; + int64_t part_size_; + }; +public: + ObTestHNSWDeserializeCallback() + {} + int operator()(char *&data, const int64_t data_size, int64_t &read_size, share::ObIStreamBuf::CbParam &cb_param) + { + int ret = OB_SUCCESS; + ObTestHNSWDeserializeCallback::CbParam ¶m = static_cast(cb_param); + if (param.cur_pos_ <= param.size_) { + read_size = (param.size_ - param.cur_pos_) > param.part_size_ ? param.part_size_ : (param.size_ - param.cur_pos_); + data = ((char*)param.data_) + param.cur_pos_; + param.cur_pos_ += read_size; + } else { + ret = OB_ITER_END; + } + LOG_INFO("[Vsag] get des data", K(ret), K(data), K(read_size), K(param.size_), K(param.cur_pos_), K(param.part_size_)); + return ret; + } +private: +}; + +TEST_F(TestVectorIndexAdaptor, test_ser_deser) +{ + void* raw_memory = (void*)malloc(sizeof(common::obvectorutil::ObVsagLogger)); + common::obvectorutil::ObVsagLogger* ob_logger = new (raw_memory)common::obvectorutil::ObVsagLogger(); + obvectorlib::set_logger(ob_logger); + obvectorlib::VectorIndexPtr index_handler = nullptr; + std::mt19937 rng; + rng.seed(50); + int dim = 3; + int max_degree = 16; + int ef_search = 200; + int ef_construction = 100; + const char* const METRIC_L2 = "l2"; + const char* const DATATYPE_FLOAT32 = "float32"; + ASSERT_EQ(obvectorutil::create_index(index_handler, + obvectorlib::HNSW_TYPE, + DATATYPE_FLOAT32, + METRIC_L2, + dim, + max_degree, + ef_construction, + ef_search), 0); + int num_vectors = 40; + + int64_t *ids = new int64_t[num_vectors]; + float *vecs = new float[dim * num_vectors]; + + std::uniform_real_distribution<> distrib_real; + for (int64_t i = 0; i < num_vectors; ++i) { + ids[i] = i; + } + + for (int64_t i = 0; i < num_vectors * dim; ++i) { + vecs[i] = distrib_real(rng); + } + + // for (int64_t i = 0; i < 100; ++i) { + // ASSERT_EQ(0, obvectorutil::add_index(index_handler, vecs + i * 100, ids + i * 100, dim, 100)); + // } + ASSERT_EQ(0, obvectorutil::add_index(index_handler, vecs, ids, dim, num_vectors)); + + int64_t index_size = 0; + ASSERT_EQ(0, obvectorutil::get_index_number(index_handler, index_size)); + ASSERT_EQ(index_size, num_vectors); + + // do serialize + ObArenaAllocator allocator; + ObVectorIndexSerializer index_seri(allocator); + ObTestHNSWSerializeCallback ser_callback; + ObOStreamBuf::Callback ser_cb = ser_callback; + + ObTestHNSWSerializeCallback::CbParam ser_param; + ser_param.allocator_ = &allocator; + ASSERT_EQ(0, index_seri.serialize(index_handler, ser_param, ser_cb)); + + // do deserialize + obvectorlib::VectorIndexPtr des_index_handler = nullptr; + ObTestHNSWDeserializeCallback des_callback; + ObIStreamBuf::Callback des_cb = des_callback; + + ObTestHNSWDeserializeCallback::CbParam des_param; + des_param.allocator_ = &allocator; + des_param.data_ = ser_param.data_; + des_param.size_ = ser_param.size_; + des_param.cur_pos_ = 0; + des_param.part_size_ = 10; + ASSERT_EQ(obvectorutil::create_index(des_index_handler, + obvectorlib::HNSW_TYPE, + DATATYPE_FLOAT32, + METRIC_L2, + dim, + max_degree, + ef_construction, + ef_search), 0); + ASSERT_EQ(0, index_seri.deserialize(des_index_handler, des_param, des_cb)); + // check vector count + ASSERT_EQ(0, obvectorutil::get_index_number(des_index_handler, index_size)); + ASSERT_EQ(index_size, num_vectors); +} + +class VecIndexAlloctor : public ObIAllocator +{ +public: + VecIndexAlloctor(); + ~VecIndexAlloctor(); + + virtual void *alloc(const int64_t sz); + void *alloc(const int64_t size, const ObMemAttr &attr); + virtual void *realloc(void *ptr, const int64_t oldsz, const int64_t newsz); + virtual void free(void *ptr); + int64_t used(); + int64_t total(); + int64_t global_total(); + constexpr static int64_t MEM_PTR_HEAD_SIZE = sizeof(int64_t); + +private: + static std::atomic global_total_; + ObMemAttr attr_; + std::atomic used_; + int64_t total_; +}; + +class ObVsagAllocator : public vsag::Allocator +{ + void* Allocate(size_t size) override { + return malloc(size); + } + + void Deallocate(void* p) override { + return free(p); + } + + void* Reallocate(void* p, size_t size) override { + return realloc(p, size); + } + +}; + +std::atomicVecIndexAlloctor::global_total_(0); + +void *VecIndexAlloctor::alloc(const int64_t size) +{ + int64_t actual_size = MEM_PTR_HEAD_SIZE + size; + global_total_ += actual_size; + used_ += actual_size; + + void *ptr = nullptr; + ptr = ob_malloc(actual_size, attr_); + *(int64_t*)ptr = actual_size; + return (char*)ptr + MEM_PTR_HEAD_SIZE; +} + +void VecIndexAlloctor::free(void *ptr) +{ + void *size_ptr = (char*)ptr - sizeof(uint64_t); + int64_t size = *(int64_t *)size_ptr; + global_total_ -= size; + used_ -= size; + + ob_free((char*)ptr - MEM_PTR_HEAD_SIZE); +} + + +void *VecIndexAlloctor::realloc(void *ptr, const int64_t oldsz, const int64_t newsz) +{ + void *new_ptr = nullptr; + if (newsz < oldsz) { + return new_ptr; + } else { + int64_t actual_size = MEM_PTR_HEAD_SIZE + newsz; + global_total_ += newsz - oldsz + MEM_PTR_HEAD_SIZE; + used_ += newsz - oldsz + MEM_PTR_HEAD_SIZE; + + new_ptr = ob_realloc((char*)ptr - MEM_PTR_HEAD_SIZE, actual_size, attr_); + *(char*)new_ptr = actual_size; + } + + return (char*)new_ptr + MEM_PTR_HEAD_SIZE; +} + +class ExampleAllocator : public vsag::Allocator { +public: + std::string + Name() override { + return "myallocator"; + } + + void* Allocate(size_t size) override { + return malloc(size); + } + + void Deallocate(void* p) override { + return free(p); + } + + void* Reallocate(void* p, size_t size) override { + return realloc(p, size); + } +}; + +class VsagMemContext : public vsag::Allocator +{ +public: + VsagMemContext() {}; + ~VsagMemContext() { DESTROY_CONTEXT(mem_context_); } + int init(); + + std::string Name() override { + return "ObVsagAlloc"; + } + void* Allocate(size_t size) override { + return mem_context_->get_malloc_allocator().alloc(size); + } + + void Deallocate(void* p) override { + return mem_context_->get_malloc_allocator().free(p); + } + + void* Reallocate(void* p, size_t size) override { + void *new_ptr = nullptr; + if (size == 0) { + if (OB_NOT_NULL(p)) { + mem_context_->get_malloc_allocator().free(p); + } + } else { + new_ptr = mem_context_->get_malloc_allocator().alloc(size); + if (OB_ISNULL(new_ptr) || OB_ISNULL(p)) { + } else { + MEMCPY(new_ptr, p, size); + mem_context_->get_malloc_allocator().free(p); + } + } + return new_ptr; + } + + int64_t total() { + return mem_context_->malloc_used(); + } + +private: + lib::MemoryContext mem_context_; + +}; + +int VsagMemContext::init() +{ + INIT_SUCC(ret); + lib::ContextParam param; + param.set_mem_attr(MTL_ID()) + .set_properties(lib::ALLOC_THREAD_SAFE | lib::RETURN_MALLOC_DEFAULT) + .set_page_size(OB_MALLOC_MIDDLE_BLOCK_SIZE) + .set_ablock_size(lib::INTACT_MIDDLE_AOBJECT_SIZE); + if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(mem_context_, param))) { + //LOG_WARN("create memory entity failed", K(ret)); + } else { + + } + return ret; +} + +TEST_F(TestVectorIndexAdaptor, mem_ctx_child) +{ + lib::ContextParam parent_param; + lib::MemoryContext parent_mem_context; + parent_param.set_mem_attr(MTL_ID()) + .set_properties(lib::ADD_CHILD_THREAD_SAFE | lib::ALLOC_THREAD_SAFE | lib::RETURN_MALLOC_DEFAULT) + .set_page_size(OB_MALLOC_MIDDLE_BLOCK_SIZE) + .set_ablock_size(lib::INTACT_MIDDLE_AOBJECT_SIZE); + ASSERT_EQ(ROOT_CONTEXT->CREATE_CONTEXT(parent_mem_context, parent_param), 0); + + lib::ContextParam param; + lib::MemoryContext mem_context; + param.set_page_size(OB_MALLOC_MIDDLE_BLOCK_SIZE); + ASSERT_EQ(parent_mem_context->CREATE_CONTEXT(mem_context, param), 0); + + int64_t *vid = nullptr; + vid = static_cast(mem_context->get_malloc_allocator().alloc(sizeof(int64_t) * 1000)); + ASSERT_EQ(vid != NULL, 1); + + std::cout << "Alloc MemoryContext: " << mem_context->used() << std::endl; + std::cout << "Alloc MemoryContext: " << mem_context->hold() << std::endl; + std::cout << "Alloc MemoryContext: " << parent_mem_context->used() << std::endl; + std::cout << "Alloc MemoryContext: " << parent_mem_context->hold() << std::endl; +} + +TEST_F(TestVectorIndexAdaptor, mem_ctx) +{ + VsagMemContext vsag_mem_context; + ASSERT_EQ(vsag_mem_context.init(), 0); + std::cout << "INIT MemoryContext: " << vsag_mem_context.total() << std::endl; + int64_t *vid = nullptr; + vid = static_cast(vsag_mem_context.Allocate(sizeof(int64_t) * 1000)); + std::cout << "Alloc MemoryContext: " << vsag_mem_context.total() << std::endl; + vsag_mem_context.Deallocate(vid); + vid = nullptr; + std::cout << "Free MemoryContext: " << vsag_mem_context.total() << std::endl; + + vid = static_cast(vsag_mem_context.Allocate(sizeof(int64_t) * 1000)); + std::cout << "Alloc MemoryContext: " << vsag_mem_context.total() << std::endl; + + vid = static_cast(vsag_mem_context.Reallocate(vid, sizeof(int64_t) * 2000)); + std::cout << "Realloc MemoryContext: " << vsag_mem_context.total() << std::endl; + + vsag_mem_context.Deallocate(vid); + vid = nullptr; + std::cout << "Free MemoryContext: " << vsag_mem_context.total() << std::endl; + +} + +#if 0 +TEST_F(TestVectorIndexAdaptor, vsag_alloc) +{ + VsagMemContext vsag_mem_context; + ASSERT_EQ(vsag_mem_context.init(), 0); + obvectorlib::VectorIndexPtr index_handler = nullptr; + std::mt19937 rng; + rng.seed(50); + int dim = 128; + int max_degree = 16; + int ef_search = 200; + int ef_construction = 100; + const char* const METRIC_L2 = "l2"; + const char* const DATATYPE_FLOAT32 = "float32"; + std::cout << "MemoryUsed0: " << vsag_mem_context.total() << std::endl; + ASSERT_EQ(obvectorutil::create_index(index_handler, + obvectorlib::HNSW_TYPE, + DATATYPE_FLOAT32, + METRIC_L2, + dim, + max_degree, + ef_construction, + ef_search, + &vsag_mem_context), 0); + std::cout << "MemoryUsed1: " << vsag_mem_context.total() << std::endl; + int num_vectors = 100; + + int64_t *ids = new int64_t[num_vectors]; + float *vecs = new float[dim * num_vectors]; + + std::uniform_real_distribution<> distrib_real; + for (int64_t i = 0; i < num_vectors; ++i) { + ids[i] = i; + } + + for (int64_t i = 0; i < num_vectors * dim; ++i) { + vecs[i] = distrib_real(rng); + } + + ASSERT_EQ(0, obvectorutil::add_index(index_handler, vecs, ids, dim, num_vectors)); + std::cout << "MemoryUsed2: " << vsag_mem_context.total() << std::endl; + + int64_t index_size = 0; + ASSERT_EQ(0, obvectorutil::get_index_number(index_handler, index_size)); + ASSERT_EQ(index_size, num_vectors); + + const float* result_dist0; + const int64_t* result_ids0; + const float* result_dist1; + const int64_t* result_ids1; + int64_t expect_cnt = 10; + int64_t result_size = 0; + roaring::api::roaring64_bitmap_t *bitmap = roaring::api::roaring64_bitmap_create(); + + float *query_vecs = new float[dim]; + for (int64_t i = 0; i < dim; ++i) { + query_vecs[i] = distrib_real(rng); + if (i == 0) { + std::cout << "[" << vecs[i] << ", "; + } else if (i == dim - 1) { + std::cout << vecs[i] << "]" << std::endl; + } else { + std::cout << vecs[i] << ", "; + } + } + + ASSERT_EQ(0, obvectorutil::knn_search(index_handler, + query_vecs, + dim, + expect_cnt, + result_dist0, + result_ids0, + result_size, + ef_search, + bitmap)); + for (int64_t i = 0; i < result_size; ++i) { + std::cout << i << " id: " << result_ids0[i] << " dis: " << result_dist0[i] << std::endl; + // for (int64_t j = 0; j < dim; ++j) { + // if (j == 0) { + // std::cout << "[" << vecs[i * dim + j] << ", "; + // } else if (j == dim - 1) { + // std::cout << vecs[i * dim + j] << "]" << std::endl; + // } else { + // std::cout << vecs[i * dim + j] << ", "; + // } + // } + } + std::cout << "MemoryUsed3: " << vsag_mem_context.total() << std::endl; + + obvectorutil::delete_index(index_handler); + std::cout << "MemoryUsed: " << vsag_mem_context.total() << std::endl; +} +#endif + +}; + +int main(int argc, char** argv) +{ + system("rm -f test_vector_index_adaptor.log*"); + system("rm -fr run_*"); + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_vector_index_adaptor.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/src/logservice/data_dictionary/ob_data_dict_struct.h b/src/logservice/data_dictionary/ob_data_dict_struct.h index b0f315e96b..27f6355cca 100644 --- a/src/logservice/data_dictionary/ob_data_dict_struct.h +++ b/src/logservice/data_dictionary/ob_data_dict_struct.h @@ -310,6 +310,8 @@ public: || meta_type_.is_xml_sql_type()); } + OB_INLINE bool is_collection() const { return meta_type_.is_collection_sql_type(); } + NEED_SERIALIZE_AND_DESERIALIZE_DICT; TO_STRING_KV( K_(column_id), diff --git a/src/logservice/libobcdc/src/ob_log_formatter.cpp b/src/logservice/libobcdc/src/ob_log_formatter.cpp index eeaaa1f796..054aeb1d87 100644 --- a/src/logservice/libobcdc/src/ob_log_formatter.cpp +++ b/src/logservice/libobcdc/src/ob_log_formatter.cpp @@ -1322,7 +1322,7 @@ int ObLogFormatter::fill_normal_cols_( ret = OB_ERR_UNEXPECTED; LOG_ERROR("not support ext info log type", KR(ret), K(is_new_value), KPC(lob_data_get_ctx), KPC(cv)); } - } else if (cv->is_json() || cv->is_geometry() || cv->is_roaringbitmap()) { + } else if (cv->is_json() || cv->is_geometry() || cv->is_roaringbitmap() || cv->is_collection()) { const common::ObObjType obj_type = cv->get_obj_type(); cv->value_.set_string(obj_type, *new_col_str); @@ -1375,7 +1375,7 @@ int ObLogFormatter::fill_normal_cols_( ret = OB_ERR_UNEXPECTED; LOG_ERROR("not support ext info log type", KR(ret), K(is_new_value), KPC(lob_data_get_ctx), KPC(cv)); } - } else if (cv->is_json() || cv->is_geometry() || cv->is_roaringbitmap()) { + } else if (cv->is_json() || cv->is_geometry() || cv->is_roaringbitmap() || cv->is_collection()) { const common::ObObjType obj_type = cv->get_obj_type(); cv->value_.set_string(obj_type, *old_col_str); diff --git a/src/logservice/libobcdc/src/ob_log_meta_manager.cpp b/src/logservice/libobcdc/src/ob_log_meta_manager.cpp index e1403d9f4b..9edfa7b935 100644 --- a/src/logservice/libobcdc/src/ob_log_meta_manager.cpp +++ b/src/logservice/libobcdc/src/ob_log_meta_manager.cpp @@ -1180,6 +1180,12 @@ int ObLogMetaManager::set_column_meta_( mysql_type = obmysql::MYSQL_TYPE_ORA_XML; } else if (ObRoaringBitmapType == col_type) { mysql_type = obmysql::MYSQL_TYPE_ROARINGBITMAP; + } else if (ObCollectionSQLType == col_type) { + // get extended_type_info from column schema and determine it is array or vector + const ObIArray &extended_type_info = column_schema.get_extended_type_info(); + if (OB_FAIL(ObArrayUtil::get_mysql_type(extended_type_info, mysql_type))) { + LOG_ERROR("get_mysql_type fail", KR(ret)); + } } col_meta->setScale(column_schema.get_data_scale()); diff --git a/src/logservice/libobcdc/src/ob_log_part_trans_task.cpp b/src/logservice/libobcdc/src/ob_log_part_trans_task.cpp index 5eb657b6c1..39f21c920e 100644 --- a/src/logservice/libobcdc/src/ob_log_part_trans_task.cpp +++ b/src/logservice/libobcdc/src/ob_log_part_trans_task.cpp @@ -417,6 +417,11 @@ int MutatorRow::add_column_( "old_obj_len", value->get_string_len(), "new_obj_ptr", (void *)cv_node->value_.get_string_ptr(), "new_obj_len", cv_node->value_.get_string_len()); + } else if (value->is_collection_sql_type() && value->get_string_len() > 2 * _M_) { // Array may exceed 2M + OBLOG_FORMATTER_LOG(DEBUG, "column_cast: ", "old_obj_ptr", (void *)value->get_string_ptr(), + "old_obj_len", value->get_string_len(), + "new_obj_ptr", (void *)cv_node->value_.get_string_ptr(), + "new_obj_len", cv_node->value_.get_string_len()); } else { OBLOG_FORMATTER_LOG(DEBUG, "column_cast: ", "old_obj", *value, "new_obj", cv_node->value_); diff --git a/src/logservice/libobcdc/src/ob_log_part_trans_task.h b/src/logservice/libobcdc/src/ob_log_part_trans_task.h index 90cc71f940..6c387c8ddd 100644 --- a/src/logservice/libobcdc/src/ob_log_part_trans_task.h +++ b/src/logservice/libobcdc/src/ob_log_part_trans_task.h @@ -193,6 +193,7 @@ struct ColValue bool is_json() const { return value_.is_json(); } bool is_geometry() const { return value_.is_geometry(); } bool is_roaringbitmap() const { return value_.is_roaringbitmap(); } + bool is_collection() const { return value_.is_collection_sql_type(); } common::ObObjType get_obj_type() const { return value_.get_type(); } int add_child(ColValue *child) {return children_.add(child);} diff --git a/src/logservice/libobcdc/src/ob_log_schema_cache_info.cpp b/src/logservice/libobcdc/src/ob_log_schema_cache_info.cpp index 30c82b8921..8059930306 100644 --- a/src/logservice/libobcdc/src/ob_log_schema_cache_info.cpp +++ b/src/logservice/libobcdc/src/ob_log_schema_cache_info.cpp @@ -213,8 +213,9 @@ int ColumnSchemaInfo::init_extended_type_info_( common::ObIAllocator &allocator) { int ret = OB_SUCCESS; + const bool is_extended_type = column_table_schema.is_enum_or_set() || column_table_schema.is_collection(); - if (! column_table_schema.is_enum_or_set()) { + if (! is_extended_type) { // do nothing } else { // Only enum or set types are cached diff --git a/src/logservice/libobcdc/src/ob_log_utils.cpp b/src/logservice/libobcdc/src/ob_log_utils.cpp index 724fd97eed..3f3cd093d4 100644 --- a/src/logservice/libobcdc/src/ob_log_utils.cpp +++ b/src/logservice/libobcdc/src/ob_log_utils.cpp @@ -491,6 +491,14 @@ const char *get_ctype_string(int ctype) sc_type = "MYSQL_TYPE_ROARINGBITMAP"; break; + case oceanbase::obmysql::MYSQL_TYPE_OB_VECTOR: + sc_type = "MYSQL_TYPE_OB_VECTOR"; + break; + + case oceanbase::obmysql::MYSQL_TYPE_OB_ARRAY: + sc_type = "MYSQL_TYPE_OB_ARRAY"; + break; + case oceanbase::obmysql::MYSQL_TYPE_NEWDECIMAL: sc_type = "MYSQL_TYPE_NEWDECIMAL"; break; @@ -630,6 +638,12 @@ bool is_roaringbitmap_type(const int ctype) return (ctype == oceanbase::obmysql::MYSQL_TYPE_ROARINGBITMAP); } +bool is_collection_type(const int ctype) +{ + return (ctype == oceanbase::obmysql::MYSQL_TYPE_OB_ARRAY + || ctype == oceanbase::obmysql::MYSQL_TYPE_OB_VECTOR); +} + double get_delay_sec(const int64_t tstamp_ns) { int64_t delta = (ObTimeUtility::current_time() - tstamp_ns / NS_CONVERSION); diff --git a/src/logservice/libobcdc/src/ob_log_utils.h b/src/logservice/libobcdc/src/ob_log_utils.h index 3861498e6e..54fed901e4 100644 --- a/src/logservice/libobcdc/src/ob_log_utils.h +++ b/src/logservice/libobcdc/src/ob_log_utils.h @@ -251,6 +251,7 @@ bool is_json_type(const int ctype); bool is_geometry_type(const int ctype); bool is_xml_type(const int ctype); bool is_roaringbitmap_type(const int ctype); +bool is_collection_type(const int ctype); int64_t get_non_hidden_column_count(const oceanbase::share::schema::ObTableSchema &table_schema); double get_delay_sec(const int64_t tstamp); diff --git a/src/logservice/libobcdc/src/ob_obj2str_helper.cpp b/src/logservice/libobcdc/src/ob_obj2str_helper.cpp index 6df0c22246..b6d0dab8da 100644 --- a/src/logservice/libobcdc/src/ob_obj2str_helper.cpp +++ b/src/logservice/libobcdc/src/ob_obj2str_helper.cpp @@ -23,6 +23,7 @@ #include "lib/geo/ob_geo_utils.h" #include "lib/roaringbitmap/ob_rb_utils.h" #include "lib/xml/ob_xml_util.h" +#include "lib/udt/ob_array_type.h" #include "sql/engine/expr/ob_expr_uuid.h" #include "sql/engine/expr/ob_expr_operator.h" #include "sql/engine/expr/ob_expr_res_type_map.h" @@ -115,7 +116,7 @@ void ObObj2strHelper::destroy() } -//extended_type_info used for enum/set +//extended_type_info used for enum/set and collection type int ObObj2strHelper::obj2str(const uint64_t tenant_id, const uint64_t table_id, const uint64_t column_id, @@ -174,6 +175,11 @@ int ObObj2strHelper::obj2str(const uint64_t tenant_id, OBLOG_LOG(ERROR, "binary_format_convert fail", KR(ret), K(table_id), K(column_id), K(obj), K(obj_type), K(str)); } + } else if (ObCollectionSQLType == obj_type) { + if (OB_FAIL(convert_collection_to_text_(obj, str, extended_type_info, allocator))) { + OBLOG_LOG(ERROR, "convert_collection_to_text_ fail", KR(ret), K(table_id), K(column_id), + K(obj), K(obj_type), K(str)); + } // This should be before is_string_type, because for char/nchar it is also ObStringTC, so is_string_type=true } else if (need_padding_(compat_mode, obj)) { if (OB_FAIL(convert_char_obj_to_padding_obj_(compat_mode, obj, accuracy, collation_type, allocator, str))) { @@ -464,6 +470,16 @@ int ObObj2strHelper::convert_xmltype_to_text_( return ObXmlUtil::xml_bin_to_text(allocator, data, str); } +int ObObj2strHelper::convert_collection_to_text_( + const common::ObObj &obj, + common::ObString &str, + const common::ObIArray &extended_type_info, + common::ObIAllocator &allocator) const +{ + const ObString &data = obj.get_string(); + return ObArrayUtil::convert_collection_bin_to_string(data, extended_type_info, allocator, str); +} + bool ObObj2strHelper::need_padding_(const lib::Worker::CompatMode &compat_mode, const common::ObObj &obj) const { diff --git a/src/logservice/libobcdc/src/ob_obj2str_helper.h b/src/logservice/libobcdc/src/ob_obj2str_helper.h index 2f229ed199..a05d0cbac8 100644 --- a/src/logservice/libobcdc/src/ob_obj2str_helper.h +++ b/src/logservice/libobcdc/src/ob_obj2str_helper.h @@ -125,6 +125,12 @@ private: common::ObString &str, common::ObIAllocator &allocator) const; + int convert_collection_to_text_( + const common::ObObj &obj, + common::ObString &str, + const common::ObIArray &extended_type_info, + common::ObIAllocator &allocator) const; + private: bool inited_; IObCDCTimeZoneInfoGetter *timezone_info_getter_; diff --git a/src/logservice/libobcdc/tests/ob_binlog_record_printer.cpp b/src/logservice/libobcdc/tests/ob_binlog_record_printer.cpp index 0be3468329..bec0214e50 100644 --- a/src/logservice/libobcdc/tests/ob_binlog_record_printer.cpp +++ b/src/logservice/libobcdc/tests/ob_binlog_record_printer.cpp @@ -554,10 +554,16 @@ int ObBinlogRecordPrinter::output_data_file_column_data(IBinlogRecord *br, bool is_geometry = is_geometry_type(ctype); bool is_xml = is_xml_type(ctype); bool is_roaringbitmap = is_roaringbitmap_type(ctype); + bool is_collection = is_collection_type(ctype); bool is_diff = (index < new_cols_count) && new_cols[index].m_diff_val; constexpr int64_t string_print_md5_threshold = 4L << 10; - const bool is_type_for_md5_printing = is_lob || is_json || is_geometry || is_xml || is_roaringbitmap || - (is_string && col_data_length >= string_print_md5_threshold); + const bool is_type_for_md5_printing = is_lob + || is_json + || is_geometry + || is_xml + || is_roaringbitmap + || is_collection + || (is_string && col_data_length >= string_print_md5_threshold); // TODO 止尘 patch the code // bool is_json_diff = br->isJsonDiffColVal(cname); bool is_json_diff = false; diff --git a/src/observer/CMakeLists.txt b/src/observer/CMakeLists.txt index 7a0424a7c4..25b3afecdd 100644 --- a/src/observer/CMakeLists.txt +++ b/src/observer/CMakeLists.txt @@ -447,6 +447,7 @@ ob_set_subtarget(ob_server virtual_table virtual_table/ob_all_virtual_sys_variable_default_value.cpp virtual_table/ob_all_virtual_tenant_scheduler_running_job.cpp virtual_table/ob_all_virtual_compatibility_control.cpp + virtual_table/ob_all_virtual_vector_index_info.cpp virtual_table/ob_all_virtual_session_ps_info.cpp virtual_table/ob_all_virtual_tmp_file.cpp ) diff --git a/src/observer/mysql/obsm_utils.cpp b/src/observer/mysql/obsm_utils.cpp index 916c4ef229..2bbc4734de 100644 --- a/src/observer/mysql/obsm_utils.cpp +++ b/src/observer/mysql/obsm_utils.cpp @@ -378,8 +378,7 @@ int ObSMUtils::cell_str( } break; } - case ObUserDefinedSQLTC: - case ObCollectionSQLTC: { + case ObUserDefinedSQLTC: { if (obj.get_udt_subschema_id() == 0) { // xml ret = ObMySQLUtil::sql_utd_cell_str(MTL_ID(), buf, len, obj.get_string(), pos); } else if (type == MYSQL_PROTOCOL_TYPE::TEXT) { // common sql udt text protocal @@ -391,6 +390,10 @@ int ObSMUtils::cell_str( } break; } + case ObCollectionSQLTC: { + ret = ObMySQLUtil::varchar_cell_str(buf, len, obj.get_string(), is_oracle_raw, pos); + break; + } case ObDecimalIntTC: { ret = ObMySQLUtil::decimalint_cell_str(buf, len, obj.get_decimal_int(), obj.get_int_bytes(), obj.get_scale(), pos, zerofill, zflength); diff --git a/src/observer/ob_rpc_processor_simple.cpp b/src/observer/ob_rpc_processor_simple.cpp index 5e21338034..e88b6fa8c2 100644 --- a/src/observer/ob_rpc_processor_simple.cpp +++ b/src/observer/ob_rpc_processor_simple.cpp @@ -559,6 +559,20 @@ int ObRpcCheckandCancelDDLComplementDagP::process() return ret; } +int ObRpcCheckandCancelDeleteLobMetaRowDagP::process() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(gctx_.ob_service_)) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", K(ret), K(gctx_.ob_service_)); + } else { + bool is_dag_exist = true; + ret = gctx_.ob_service_->check_and_cancel_delete_lob_meta_row_dag(arg_, is_dag_exist); + result_ = is_dag_exist; + } + return ret; +} + int ObRpcFetchSysLSP::process() { int ret = OB_SUCCESS; diff --git a/src/observer/ob_rpc_processor_simple.h b/src/observer/ob_rpc_processor_simple.h index d0ef8a3a23..637c24bdc2 100644 --- a/src/observer/ob_rpc_processor_simple.h +++ b/src/observer/ob_rpc_processor_simple.h @@ -282,6 +282,7 @@ OB_DEFINE_PROCESSOR_OBADMIN(Srv, OB_FORCE_DUMP_SERVER_USAGE, ObForceDumpServerUs OB_DEFINE_PROCESSOR_S(Srv, OB_REFRESH_SERVICE_NAME, ObRefreshServiceNameP); OB_DEFINE_PROCESSOR_S(Srv, OB_CAL_UNIT_PHY_RESOURCE, ObResourceLimitCalculatorP); OB_DEFINE_PROCESSOR_S(Srv, OB_CHECK_AND_CANCEL_DDL_COMPLEMENT_DAG, ObRpcCheckandCancelDDLComplementDagP); +OB_DEFINE_PROCESSOR_S(Srv, OB_CHECK_AND_CANCEL_DELETE_LOB_META_ROW_DAG, ObRpcCheckandCancelDeleteLobMetaRowDagP); OB_DEFINE_PROCESSOR_S(Srv, OB_KILL_CLIENT_SESSION, ObKillClientSessionP); OB_DEFINE_PROCESSOR_S(Srv, OB_CLIENT_SESSION_CONNECT_TIME, ObClientSessionConnectTimeP); diff --git a/src/observer/ob_server.cpp b/src/observer/ob_server.cpp index 7a851ad495..05d12d8dc3 100644 --- a/src/observer/ob_server.cpp +++ b/src/observer/ob_server.cpp @@ -128,6 +128,7 @@ #endif #include "lib/xml/ob_libxml2_sax_handler.h" #include "ob_check_params.h" +#include "share/vector_index/ob_plugin_vector_index_utils.h" #include "lib/roaringbitmap/ob_rb_memory_mgr.h" #ifdef OB_BUILD_AUDIT_SECURITY #include "sql/audit/ob_audit_log_mgr.h" @@ -2727,6 +2728,12 @@ int ObServer::init_sql() } } + if (OB_SUCC(ret)) { + if (OB_FAIL(ObPluginVectorIndexUtils::set_vsag_logger())) { + LOG_ERROR("failed to initialize VSAG LOGGER.", K(ret)); + } + } + if (OB_SUCC(ret)) { LOG_INFO("init sql done"); } else { diff --git a/src/observer/ob_service.cpp b/src/observer/ob_service.cpp index 29dbe8457f..97295e2a10 100644 --- a/src/observer/ob_service.cpp +++ b/src/observer/ob_service.cpp @@ -55,6 +55,7 @@ #include "ob_server_event_history_table_operator.h" #include "share/ob_alive_server_tracer.h" #include "storage/ddl/ob_complement_data_task.h" // complement data for drop column +#include "storage/ddl/ob_delete_lob_meta_row_task.h" // delete lob meta row for drop vec index #include "storage/ddl/ob_ddl_merge_task.h" #include "storage/ddl/ob_build_index_task.h" #include "storage/tablet/ob_tablet_multi_source_data.h" @@ -2632,6 +2633,39 @@ int ObService::build_ddl_single_replica_request(const ObDDLBuildSingleReplicaReq ret = OB_SIZE_OVERFLOW == saved_ret ? OB_EAGAIN : ret; } LOG_INFO("obs get rpc to build drop column dag", K(ret)); + } else if (ObDDLType(arg.ddl_type_) == ObDDLType::DDL_DROP_VEC_INDEX) { + ObTenantDagScheduler *dag_scheduler = nullptr; + ObDeleteLobMetaRowDag *dag = nullptr; + if (OB_ISNULL(dag_scheduler = MTL(ObTenantDagScheduler *))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("dag scheduler is null", K(ret)); + } else if (OB_FAIL(dag_scheduler->alloc_dag(dag))) { + LOG_WARN("fail to alloc dag", K(ret)); + } else if (OB_ISNULL(dag)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, dag is null", K(ret), KP(dag)); + } else if (OB_FAIL(dag->init(arg))) { + LOG_WARN("fail to init delete drop lob meta row dag", K(ret), K(arg)); + } else if (OB_FAIL(dag->create_first_task())) { + LOG_WARN("create first task failed", K(ret)); + } else if (OB_FAIL(dag_scheduler->add_dag(dag))) { + if (OB_EAGAIN == ret) { + LOG_WARN("delete lob meta row dag already exists, no need to schedule once again", KR(ret)); + ret = OB_SUCCESS; + } else if (OB_SIZE_OVERFLOW == ret) { + LOG_WARN("dag is full", KR(ret)); + ret = OB_EAGAIN; + } else { + LOG_WARN("fail to add dag to queue", KR(ret)); + } + } else { + dag = nullptr; + } + if (OB_NOT_NULL(dag_scheduler) && OB_NOT_NULL(dag)) { + (void) dag->handle_init_failed_ret_code(ret); + dag_scheduler->free_dag(*dag); + dag = nullptr; + } } else { ret = OB_NOT_SUPPORTED; LOG_WARN("not supported ddl type", K(ret), K(arg)); @@ -2651,6 +2685,47 @@ int ObService::check_and_cancel_ddl_complement_data_dag(const ObDDLBuildSingleRe } else if (OB_UNLIKELY(!is_complement_data_relying_on_dag(ObDDLType(arg.ddl_type_)))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid ddl type", K(ret), K(arg)); + } else { + ObTenantDagScheduler *dag_scheduler = nullptr; + ObComplementDataDag *dag = nullptr; + if (OB_ISNULL(dag_scheduler = MTL(ObTenantDagScheduler *))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("dag scheduler is null", K(ret)); + } else if (OB_FAIL(dag_scheduler->alloc_dag(dag))) { + LOG_WARN("fail to alloc dag", K(ret)); + } else if (OB_ISNULL(dag)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, dag is null", K(ret), KP(dag)); + } else if (OB_FAIL(dag->init(arg))) { + LOG_WARN("fail to init complement data dag", K(ret), K(arg)); + } else if (OB_FAIL(dag_scheduler->check_dag_exist(dag, is_dag_exist))) { + LOG_WARN("check dag exist failed", K(ret)); + } else if (is_dag_exist && OB_FAIL(dag_scheduler->cancel_dag(dag))) { + // sync to cancel ready dag only, not including running dag. + LOG_WARN("cancel dag failed", KP(dag), K(ret)); + } + if (OB_NOT_NULL(dag)) { + (void) dag->handle_init_failed_ret_code(ret); + dag_scheduler->free_dag(*dag); + dag = nullptr; + } + } + if (REACH_COUNT_INTERVAL(1000L)) { + LOG_INFO("receive cancel ddl complement dag request", K(ret), K(is_dag_exist), K(arg)); + } + return ret; +} + +int ObService::check_and_cancel_delete_lob_meta_row_dag(const obrpc::ObDDLBuildSingleReplicaRequestArg &arg, bool &is_dag_exist) +{ + int ret = OB_SUCCESS; + is_dag_exist = true; + if (OB_UNLIKELY(!arg.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(arg)); + } else if (OB_UNLIKELY(!is_delete_lob_meta_row_relying_on_dag(ObDDLType(arg.ddl_type_)))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid ddl type", K(ret), K(arg)); } else { ObTenantDagScheduler *dag_scheduler = nullptr; ObComplementDataDag *dag = nullptr; diff --git a/src/observer/ob_service.h b/src/observer/ob_service.h index 41fa94f62b..cefcd31765 100644 --- a/src/observer/ob_service.h +++ b/src/observer/ob_service.h @@ -126,6 +126,7 @@ public: int calc_column_checksum_request(const obrpc::ObCalcColumnChecksumRequestArg &arg, obrpc::ObCalcColumnChecksumRequestRes &res); int build_ddl_single_replica_request(const obrpc::ObDDLBuildSingleReplicaRequestArg &arg, obrpc::ObDDLBuildSingleReplicaRequestResult &res); int check_and_cancel_ddl_complement_data_dag(const obrpc::ObDDLBuildSingleReplicaRequestArg &arg, bool &is_dag_exist); + int check_and_cancel_delete_lob_meta_row_dag(const obrpc::ObDDLBuildSingleReplicaRequestArg &arg, bool &is_dag_exist); int stop_partition_write(const obrpc::Int64 &switchover_timestamp, obrpc::Int64 &result); int check_partition_log(const obrpc::Int64 &switchover_timestamp, obrpc::Int64 &result); int get_wrs_info(const obrpc::ObGetWRSArg &arg, obrpc::ObGetWRSResult &result); diff --git a/src/observer/ob_srv_xlator_rootserver.cpp b/src/observer/ob_srv_xlator_rootserver.cpp index e21e510e8f..fde3711605 100644 --- a/src/observer/ob_srv_xlator_rootserver.cpp +++ b/src/observer/ob_srv_xlator_rootserver.cpp @@ -111,7 +111,7 @@ void oceanbase::observer::init_srv_xlator_for_rootserver(ObSrvRpcXlator *xlator) RPC_PROCESSOR(rootserver::ObRpcRenameTableP, *gctx_.root_service_); RPC_PROCESSOR(rootserver::ObRpcTruncateTableP, *gctx_.root_service_); RPC_PROCESSOR(rootserver::ObRpcTruncateTableV2P, *gctx_.root_service_); - RPC_PROCESSOR(rootserver::ObRpcGenerateAuxIndexSchemaP, *gctx_.root_service_); + RPC_PROCESSOR(rootserver::ObRpcCreateAuxIndexP, *gctx_.root_service_); RPC_PROCESSOR(rootserver::ObRpcCreateIndexP, *gctx_.root_service_); RPC_PROCESSOR(rootserver::ObRpcDropIndexP, *gctx_.root_service_); RPC_PROCESSOR(rootserver::ObRpcCreateMLogP, *gctx_.root_service_); diff --git a/src/observer/omt/ob_multi_tenant.cpp b/src/observer/omt/ob_multi_tenant.cpp index 794bc90f38..4bbf5e0d73 100644 --- a/src/observer/omt/ob_multi_tenant.cpp +++ b/src/observer/omt/ob_multi_tenant.cpp @@ -165,6 +165,7 @@ #include "rootserver/mview/ob_mview_maintenance_service.h" #include "share/resource_limit_calculator/ob_resource_limit_calculator.h" #include "storage/checkpoint/ob_checkpoint_diagnose.h" +#include "share/vector_index/ob_plugin_vector_index_service.h" #include "lib/roaringbitmap/ob_rb_memory_mgr.h" #include "storage/tmp_file/ob_tmp_file_manager.h" // ObTenantTmpFileManager #include "storage/restore/ob_tenant_restore_info_mgr.h" @@ -602,6 +603,7 @@ int ObMultiTenant::init(ObAddr myaddr, MTL_BIND2(mtl_new_default, ObAuditLogger::mtl_init, ObAuditLogger::mtl_start, ObAuditLogger::mtl_stop, ObAuditLogger::mtl_wait, mtl_destroy_default); MTL_BIND2(mtl_new_default, ObAuditLogUpdater::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); #endif + MTL_BIND2(mtl_new_default, ObPluginVectorIndexService::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); } if (OB_SUCC(ret)) { diff --git a/src/observer/table/ttl/ob_tenant_tablet_ttl_mgr.cpp b/src/observer/table/ttl/ob_tenant_tablet_ttl_mgr.cpp index 0755752647..42c63b7cec 100644 --- a/src/observer/table/ttl/ob_tenant_tablet_ttl_mgr.cpp +++ b/src/observer/table/ttl/ob_tenant_tablet_ttl_mgr.cpp @@ -45,6 +45,8 @@ int ObTenantTabletTTLMgr::init(ObLS *ls) LOG_WARN("fail to get ls", KR(ret)); } else if (OB_FAIL(init(MTL_ID()))) { LOG_WARN("fail to init tenant ttl mgr", KR(ret), K(MTL_ID())); + } else if (OB_FAIL(vector_idx_scheduler_.init(MTL_ID(), ls, tg_id_))) { + LOG_WARN("fail to init vector idx scheduler", KR(ret), K(MTL_ID())); } else { ls_ = ls; sql_proxy_ = GCTX.sql_proxy_; @@ -66,6 +68,8 @@ int ObTenantTabletTTLMgr::init(const uint64_t tenant_id) LOG_WARN("schema service is null", KR(ret)); } else if (OB_FAIL(TG_CREATE_TENANT(lib::TGDefIDs::TenantTabletTTLMgr, tg_id_))) { LOG_WARN("fail to init timer", KR(ret)); + } else if (OB_FAIL(TG_START(tg_id_))) { + LOG_WARN("fail to create ObTenantTabletTTLMgr thread", K(ret), K_(tg_id)); } else if (OB_FAIL(alloc_tenant_info(tenant_id))) { LOG_WARN("fail to alloc tenant info", KR(ret), K(MTL_ID())); } else { @@ -137,11 +141,9 @@ int ObTenantTabletTTLMgr::start() { int ret = OB_SUCCESS; FLOG_INFO("tenant_tablet_ttl_mgr: begin to start", KPC_(ls), K_(tenant_id)); - if (IS_NOT_INIT) { + if (IS_NOT_INIT || tg_id_ == 0) { ret = OB_NOT_INIT; - LOG_WARN("tablet ttl mgr not init", KR(ret)); - } else if (OB_FAIL(TG_START(tg_id_))) { - LOG_WARN("fail to create ObTenantTabletTTLMgr thread", K(ret), K_(tg_id)); + LOG_WARN("tablet ttl mgr not init", KR(ret), K(tg_id_)); } else if (OB_FAIL(TG_SCHEDULE(tg_id_, periodic_task_, periodic_delay_, true))) { LOG_WARN("fail to schedule periodic task", KR(ret), K_(tg_id)); } else { @@ -163,6 +165,7 @@ void ObTenantTabletTTLMgr::stop() // 2) acquire ObTenantTabletTTLMgr's lock_ TG_STOP(tg_id_); is_timer_start_ = false; + vector_idx_scheduler_.stop(); common::ObSpinLockGuard guard(lock_); // set is_leader_ to false to ensure after stop, not new TTL dag task will be generate, // i.e., dag_ref won't increase anymore @@ -1328,6 +1331,9 @@ int ObTenantTabletTTLMgr::safe_to_destroy(bool &is_safe) } is_safe = false; } + if (is_safe && OB_SUCC(ret) && OB_FAIL(vector_idx_scheduler_.safe_to_destroy(is_safe))) { + LOG_WARN("fail to check vector index scheduler safe to destroy", KR(ret), K(is_safe)); + } return ret; } diff --git a/src/observer/table/ttl/ob_tenant_tablet_ttl_mgr.h b/src/observer/table/ttl/ob_tenant_tablet_ttl_mgr.h index fb2955a984..37a93abe9d 100644 --- a/src/observer/table/ttl/ob_tenant_tablet_ttl_mgr.h +++ b/src/observer/table/ttl/ob_tenant_tablet_ttl_mgr.h @@ -17,6 +17,7 @@ #include "share/table/ob_ttl_util.h" #include "share/table/ob_table_ttl_common.h" #include "share/tablet/ob_tablet_info.h" +#include "share/vector_index/ob_plugin_vector_index_scheduler.h" namespace oceanbase { @@ -160,6 +161,8 @@ public: int safe_to_destroy(bool &is_safe); int sync_all_dirty_task(common::ObIArray& dirty_tasks); void run_task(); + share::ObPluginVectorIndexLoadScheduler &get_vector_idx_scheduler() { return vector_idx_scheduler_; } + private: typedef common::hash::ObHashMap TabletTaskMap; typedef TabletTaskMap::iterator tablet_task_iter; @@ -274,6 +277,7 @@ private: bool is_timer_start_; int64_t periodic_delay_; OBTTLTimerPeriodicTask periodic_task_; + share::ObPluginVectorIndexLoadScheduler vector_idx_scheduler_; common::ObSpinLock lock_; storage::ObLS *ls_; int tg_id_; diff --git a/src/observer/virtual_table/ob_all_virtual_vector_index_info.cpp b/src/observer/virtual_table/ob_all_virtual_vector_index_info.cpp new file mode 100644 index 0000000000..617352f268 --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_vector_index_info.cpp @@ -0,0 +1,243 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_all_virtual_vector_index_info.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "share/vector_index/ob_plugin_vector_index_service.h" + +namespace oceanbase +{ +using namespace storage; +using namespace common; +namespace observer +{ +/* + * ObVectorIndexInfoIterator implement + * */ +int ObVectorIndexInfoIterator::open() +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexService *service = MTL(ObPluginVectorIndexService*); + if (is_opened_) { + ret = OB_INIT_TWICE; + SERVER_LOG(WARN, "The ObVectorIndexInfoIterator has been opened", K(ret)); + } else if (OB_FAIL(service->get_snapshot_ids(complete_tablet_ids_, partial_tablet_ids_))) { + SERVER_LOG(WARN, "failed to get snapshot_ids", K(ret)); + } else if (OB_FAIL(ptr_set_.create(MAX_PTR_SET_VALUES, ObMemAttr(MTL_ID(), "AdaptorSet")))) { + SERVER_LOG(WARN, "failed to create set", K(ret)); + } else { + cur_idx_ = 0; + is_opened_ = true; + } + return ret; +} + +int ObVectorIndexInfoIterator::get_next_info(ObVectorIndexInfo &info) +{ + int ret = OB_SUCCESS; + if (!is_opened_) { + ret = OB_NOT_INIT; + SERVER_LOG(WARN, "not init", K(ret)); + } else if (cur_idx_ >= complete_tablet_ids_.count() + partial_tablet_ids_.count()) { + ret = OB_ITER_END; + } else { + ObLSID ls_id; + ObTabletID tablet_id; + if (cur_idx_ < complete_tablet_ids_.count()) { + ls_id = complete_tablet_ids_.at(cur_idx_).ls_id_; + tablet_id = complete_tablet_ids_.at(cur_idx_).tablet_id_; + } else if (cur_idx_ < complete_tablet_ids_.count() + partial_tablet_ids_.count()) { + ls_id = partial_tablet_ids_.at(cur_idx_ - complete_tablet_ids_.count()).ls_id_; + tablet_id = partial_tablet_ids_.at(cur_idx_ - complete_tablet_ids_.count()).tablet_id_; + } + ObPluginVectorIndexAdapterGuard adapter_guard; + if (OB_FAIL(MTL(ObPluginVectorIndexService*)->get_adapter_inst_guard(ls_id, tablet_id, adapter_guard))) { + if (OB_HASH_NOT_EXIST != ret) { + SERVER_LOG(WARN, "failed to get adapter inst guard", K(ls_id), K(tablet_id), KR(ret)); + } + } else if (OB_HASH_EXIST == (ret = ptr_set_.exist_refactored(reinterpret_cast(adapter_guard.get_adatper())))) { + ret = OB_HASH_NOT_EXIST; // set OB_HASH_NOT_EXIST to ignore this adapter + } else if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + if (OB_FAIL(ptr_set_.set_refactored(reinterpret_cast(adapter_guard.get_adatper())))) { + SERVER_LOG(WARN, "failed to set adapter check set", K(ret)); + } else if (OB_FAIL(adapter_guard.get_adatper()->fill_vector_index_info(info))) { + SERVER_LOG(WARN, "failed to fill vector index info", K(ret), K(ls_id), K(tablet_id)); + } else { + info.ls_id_ = ls_id.id(); + } + } else { + SERVER_LOG(WARN, "failed to check adapter ptr", K(ret)); + } + cur_idx_++; + } + return ret; +} + +void ObVectorIndexInfoIterator::reset() +{ + cur_idx_ = 0; + complete_tablet_ids_.reset(); + partial_tablet_ids_.reset(); + allocator_.reset(); + ptr_set_.destroy(); + is_opened_ = false; +} + +/* + * ObAllVirtualVectorIndexInfo implement + * */ +ObAllVirtualVectorIndexInfo::ObAllVirtualVectorIndexInfo() + : ObVirtualTableScannerIterator(), + ip_buf_(), + info_(), + iter_() +{ +} + +ObAllVirtualVectorIndexInfo::~ObAllVirtualVectorIndexInfo() +{ + reset(); +} + +int ObAllVirtualVectorIndexInfo::inner_get_next_row(ObNewRow *&row) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(execute(row))) { + if (ret != OB_ITER_END) { + SERVER_LOG(WARN, "execute fail", K(ret)); + } + } + return ret; +} +void ObAllVirtualVectorIndexInfo::release_last_tenant() +{ + iter_.reset(); +} + +bool ObAllVirtualVectorIndexInfo::is_need_process(uint64_t tenant_id) +{ + if (!is_virtual_tenant_id(tenant_id) && + (is_sys_tenant(effective_tenant_id_) || tenant_id == effective_tenant_id_)) { + return true; + } + return false; +} + +int ObAllVirtualVectorIndexInfo::process_curr_tenant(ObNewRow *&row) +{ + int ret = OB_SUCCESS; + row = nullptr; + const int64_t col_count = output_column_ids_.count(); + ObObj *cells = cur_row_.cells_; + info_.reset(); + if (!iter_.is_opened() && OB_FAIL(iter_.open())) { + SERVER_LOG(WARN, "failed to open iter", K(ret)); + } else { + do { + if (OB_FAIL(iter_.get_next_info(info_))) { + if (OB_ITER_END != ret && OB_HASH_NOT_EXIST != ret) { + SERVER_LOG(WARN, "get next vector info failed", K(ret)); + } + } + } while (OB_HASH_NOT_EXIST == ret); + } + + for (int64_t i = 0; OB_SUCC(ret) && i < col_count; ++i) { + uint64_t col_id = output_column_ids_.at(i); + switch (col_id) { + case SVR_IP: + if (addr_.ip_to_string(ip_buf_, sizeof(ip_buf_))) { + cells[i].set_varchar(ip_buf_); + cells[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + } else { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "fail to execute ip_to_string", K(ret)); + } + break; + case SVR_PORT: + cells[i].set_int(addr_.get_port()); + break; + case TENANT_ID: + cells[i].set_int(MTL_ID()); + break; + case LS_ID: + // index_id + cells[i].set_int(info_.ls_id_); + break; + case ROWKEY_VID_TABLE_ID: + cells[i].set_int(info_.rowkey_vid_table_id_); + break; + case VID_ROWKEY_TABLE_ID: + cells[i].set_int(info_.vid_rowkey_table_id_); + break; + case INC_INDEX_TABLE_ID: + cells[i].set_int(info_.inc_index_table_id_); + break; + case VBITMAP_TABLE_ID: + cells[i].set_int(info_.vbitmap_table_id_); + break; + case SNAPSHOT_INDEX_TABLE_ID: + cells[i].set_int(info_.snapshot_index_table_id_); + break; + case DATA_TABLE_ID: + cells[i].set_int(info_.data_table_id_); + break; + case ROWKEY_VID_TABLET_ID: + cells[i].set_int(info_.rowkey_vid_tablet_id_); + break; + case VID_ROWKEY_TABLET_ID: + cells[i].set_int(info_.vid_rowkey_tablet_id_); + break; + case INC_INDEX_TABLET_ID: + cells[i].set_int(info_.inc_index_tablet_id_); + break; + case VBITMAP_TABLET_ID: + cells[i].set_int(info_.vbitmap_tablet_id_); + break; + case SNAPSHOT_INDEX_TABLET_ID: + cells[i].set_int(info_.snapshot_index_tablet_id_); + break; + case DATA_TABLET_ID: { + cells[i].set_int(info_.data_tablet_id_); + break; + } + case STATISTICS: + cells[i].set_varchar(info_.statistics_); + cells[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + case SYNC_INFO: + cells[i].set_varchar(info_.sync_info_); + cells[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + default: + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "invalid column id", K(ret), K(col_id)); + } + } + if (OB_SUCC(ret)) { + row = &cur_row_; + } + return ret; +} + +void ObAllVirtualVectorIndexInfo::reset() +{ + omt::ObMultiTenantOperator::reset(); + iter_.reset(); + memset(ip_buf_, 0, sizeof(ip_buf_)); + ObVirtualTableScannerIterator::reset(); +} + + +} /* namespace observer */ +} /* namespace oceanbase */ diff --git a/src/observer/virtual_table/ob_all_virtual_vector_index_info.h b/src/observer/virtual_table/ob_all_virtual_vector_index_info.h new file mode 100644 index 0000000000..e0d48c54b2 --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_vector_index_info.h @@ -0,0 +1,104 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_H_ +#define OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_H_ +#include "share/ob_virtual_table_scanner_iterator.h" +#include "share/vector_index/ob_plugin_vector_index_adaptor.h" +#include "observer/omt/ob_multi_tenant_operator.h" +#include "storage/tablet/ob_tablet_iterator.h" +#include "storage/tx_storage/ob_ls_map.h" + +namespace oceanbase +{ +namespace observer +{ + +class ObVectorIndexInfoIterator +{ +public: + +public: + ObVectorIndexInfoIterator() + : allocator_("VecIdxInfo"), + complete_tablet_ids_(), + partial_tablet_ids_(), + cur_idx_(0), + is_opened_(false) + { + } + virtual ~ObVectorIndexInfoIterator() { reset(); } + int open(); + int get_next_info(ObVectorIndexInfo &info); + void reset(); + bool is_opened() const { return is_opened_; } + +private: + static const int64_t MAX_PTR_SET_VALUES = 32; + common::ObArenaAllocator allocator_; + common::ObSEArray complete_tablet_ids_; + common::ObSEArray partial_tablet_ids_; + common::hash::ObHashSet ptr_set_; // only for check // can't use elements + int64_t cur_idx_; + bool is_opened_; +}; + +class ObAllVirtualVectorIndexInfo : public common::ObVirtualTableScannerIterator, + public omt::ObMultiTenantOperator +{ +public: + enum COLUMN_ID_LIST + { + SVR_IP = common::OB_APP_MIN_COLUMN_ID, + SVR_PORT, + TENANT_ID, + LS_ID, + ROWKEY_VID_TABLE_ID, + VID_ROWKEY_TABLE_ID, + INC_INDEX_TABLE_ID, + VBITMAP_TABLE_ID, + SNAPSHOT_INDEX_TABLE_ID, + DATA_TABLE_ID, + ROWKEY_VID_TABLET_ID, + VID_ROWKEY_TABLET_ID, + INC_INDEX_TABLET_ID, + VBITMAP_TABLET_ID, + SNAPSHOT_INDEX_TABLET_ID, + DATA_TABLET_ID, + STATISTICS, // memory usage, status..., logic_version + SYNC_INFO, // sync snapshot... + }; + ObAllVirtualVectorIndexInfo(); + virtual ~ObAllVirtualVectorIndexInfo(); +public: + inline void set_addr(common::ObAddr &addr) { addr_ = addr; } + virtual int inner_get_next_row(common::ObNewRow *&row); + virtual void reset(); +private: + // whether a tenant is need return content. + virtual bool is_need_process(uint64_t tenant_id) override; + // deal with current tenant's row. + virtual int process_curr_tenant(common::ObNewRow *&row) override; + // release last tenant's resource. + virtual void release_last_tenant() override; +private: + common::ObAddr addr_; + char ip_buf_[common::OB_IP_STR_BUFF]; + ObVectorIndexInfo info_; + ObVectorIndexInfoIterator iter_; +private: + DISALLOW_COPY_AND_ASSIGN(ObAllVirtualVectorIndexInfo); +}; + +} /* namespace observer */ +} /* namespace oceanbase */ +#endif diff --git a/src/observer/virtual_table/ob_information_columns_table.cpp b/src/observer/virtual_table/ob_information_columns_table.cpp index 98ab5dbf1c..edb2e7d96a 100644 --- a/src/observer/virtual_table/ob_information_columns_table.cpp +++ b/src/observer/virtual_table/ob_information_columns_table.cpp @@ -628,6 +628,7 @@ int ObInfoSchemaColumnsTable::fill_row_cells(const ObString &database_name, column_type_str_len_, column_schema->get_data_type(), column_schema->get_collation_type(), + column_schema->get_extended_type_info(), column_schema->get_geo_type()))) { SERVER_LOG(WARN,"fail to get data type str",K(ret), K(column_schema->get_data_type())); } else { @@ -1078,10 +1079,18 @@ int ObInfoSchemaColumnsTable::fill_row_cells(const common::ObString &database_na ObString type_str(strlen(column_type_str_), column_type_str_); geo_sub_type = ObGeoTypeUtil::get_geo_type_by_name(type_str); } + ObArray extend_type_info; + if (ob_is_collection_sql_type(column_attributes.result_type_.get_type())) { + ObString type_str(strlen(column_type_str_), column_type_str_); + if (OB_FAIL(extend_type_info.push_back(type_str))) { + SERVER_LOG(WARN, "fail to push to array", K(ret)); + } + } ObObjType column_type = ObMaxType; const ObColumnSchemaV2 *tmp_column_schema = NULL; - if (OB_ISNULL(table_schema_) || - OB_ISNULL(tmp_column_schema = table_schema_->get_column_schema(col_id))) { + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(table_schema_) || + OB_ISNULL(tmp_column_schema = table_schema_->get_column_schema(col_id))) { ret = OB_ERR_UNEXPECTED; SERVER_LOG(WARN, "table or column schema is null", KR(ret), KP(table_schema_), KP(tmp_column_schema)); } else if (FALSE_IT(column_type = tmp_column_schema->get_meta_type().get_type())) { @@ -1089,6 +1098,7 @@ int ObInfoSchemaColumnsTable::fill_row_cells(const common::ObString &database_na column_type_str_len_, column_attributes.result_type_.get_type(), ObCharset::get_default_collation(ObCharset::get_default_charset()), + extend_type_info, geo_sub_type))) { SERVER_LOG(WARN,"fail to get data type str",K(ret), K(column_attributes.type_)); } else { diff --git a/src/observer/virtual_table/ob_information_parameters_table.cpp b/src/observer/virtual_table/ob_information_parameters_table.cpp index 9e8a4f3ffa..5bf18fa982 100644 --- a/src/observer/virtual_table/ob_information_parameters_table.cpp +++ b/src/observer/virtual_table/ob_information_parameters_table.cpp @@ -105,7 +105,8 @@ int ObInformationParametersTable::fill_row_cells(const ObRoutineInfo *routine_in } else if (OB_FAIL(ob_sql_type_str(data_type_str, OB_MAX_SYS_PARAM_NAME_LENGTH, param_type.get_obj_type(), - param_type.get_collation_type()))) { + param_type.get_collation_type(), + param_info->get_extended_type_info()))) { SERVER_LOG(WARN, "fail to get data type str", K(ret), K(param_type.get_obj_type())); } else { ObString type_val(OB_MAX_SYS_PARAM_NAME_LENGTH, static_cast(strlen(data_type_str)), data_type_str); @@ -184,7 +185,8 @@ int ObInformationParametersTable::fill_row_cells(const ObRoutineInfo *routine_in param_type.get_length(), precision_or_length_semantics, param_type.get_scale(), - param_type.get_collation_type()))) { + param_type.get_collation_type(), + param_info->get_extended_type_info()))) { SERVER_LOG(WARN,"fail to get column type str",K(ret), K(param_type.get_obj_type())); } else { ObString type_val(OB_MAX_SYS_PARAM_NAME_LENGTH, static_cast(strlen(column_type_str)),column_type_str); diff --git a/src/observer/virtual_table/ob_mysql_proc_table.cpp b/src/observer/virtual_table/ob_mysql_proc_table.cpp index 209c4b3020..71ef533cb5 100644 --- a/src/observer/virtual_table/ob_mysql_proc_table.cpp +++ b/src/observer/virtual_table/ob_mysql_proc_table.cpp @@ -255,7 +255,8 @@ int ObMySQLProcTable::inner_get_next_row(common::ObNewRow *&row) routine_info->get_ret_type()->get_length(), routine_info->get_ret_type()->get_precision(), routine_info->get_ret_type()->get_scale(), - routine_info->get_ret_type()->get_collation_type()))) { + routine_info->get_ret_type()->get_collation_type(), + *routine_info->get_ret_type_info()))) { SHARE_SCHEMA_LOG(WARN, "fail to get data type str with coll", KPC(routine_info->get_ret_type())); } } else { diff --git a/src/observer/virtual_table/ob_table_columns.cpp b/src/observer/virtual_table/ob_table_columns.cpp index fe0f3b5c8a..efe63da71c 100644 --- a/src/observer/virtual_table/ob_table_columns.cpp +++ b/src/observer/virtual_table/ob_table_columns.cpp @@ -945,6 +945,7 @@ int ObTableColumns::deduce_column_attributes( const ObLengthSemantics default_length_semantics = session->get_local_nls_length_semantics(); int16_t precision_or_length_semantics = result_type.get_precision(); uint64_t sub_type = static_cast(ObGeoType::GEOTYPEMAX); + ObArray extend_type_info; if (is_oracle_mode && ((result_type.is_varchar_or_char() @@ -974,6 +975,19 @@ int ObTableColumns::deduce_column_attributes( sub_type = result_type.get_subschema_id(); } else if ((result_type.get_udt_id() == T_OBJ_XML) || (result_type.get_udt_id() == T_OBJ_SDO_GEOMETRY)) { sub_type = result_type.get_udt_id(); + } else if (result_type.is_collection_sql_type()) { + if (OB_NOT_NULL(session->get_cur_exec_ctx())) { + int tmp_ret = OB_SUCCESS; + const ObSqlCollectionInfo *coll_info = NULL; + uint16_t subschema_id = select_item.expr_->get_result_type().get_subschema_id(); + ObSubSchemaValue value; + if (OB_SUCCESS != (tmp_ret = session->get_cur_exec_ctx()->get_sqludt_meta_by_subschema_id(subschema_id, value))) { + LOG_WARN("failed to get subschema ctx", K(tmp_ret)); + } else if (FALSE_IT(coll_info = reinterpret_cast(value.value_))) { + } else if (OB_SUCCESS != (tmp_ret = extend_type_info.push_back(coll_info->get_def_string()))) { + LOG_WARN("failed to push back to array", K(tmp_ret), KPC(coll_info)); + } + } } if (OB_SUCC(ret) && !skip_type_str) { int64_t pos = 0; @@ -985,6 +999,7 @@ int ObTableColumns::deduce_column_attributes( precision_or_length_semantics, result_type.get_scale(), result_type.get_collation_type(), + extend_type_info, sub_type))) { LOG_WARN("fail to get data type str", K(ret)); } else { diff --git a/src/observer/virtual_table/ob_table_index.cpp b/src/observer/virtual_table/ob_table_index.cpp index 985ca24cf2..7413c98cb3 100644 --- a/src/observer/virtual_table/ob_table_index.cpp +++ b/src/observer/virtual_table/ob_table_index.cpp @@ -40,7 +40,8 @@ ObTableIndex::ObTableIndex() is_rowkey_end_(false), is_normal_end_(false), ft_dep_col_idx_(OB_INVALID_ID), - min_data_version_(OB_INVALID_VERSION) + min_data_version_(OB_INVALID_VERSION), + vec_dep_col_idx_(OB_INVALID_ID) { } @@ -87,6 +88,7 @@ void ObTableIndex::reset() simple_index_infos_.reset(); ft_dep_col_idx_ = OB_INVALID_ID; min_data_version_ = OB_INVALID_VERSION; + vec_dep_col_idx_ = OB_INVALID_ID; } int ObTableIndex::init(uint64_t tenant_id) { @@ -629,6 +631,39 @@ int ObTableIndex::add_normal_indexes(const ObTableSchema &table_schema, ret = OB_ERR_UNEXPECTED; SERVER_LOG(WARN, "fail to add normal index column", K(ret), K(col_count), K(ft_dep_col_idx_)); } + } else if (index_schema->is_built_in_vec_index()) { + is_sub_end = true; + } else if (index_schema->is_vec_index()) { + uint64_t vec_vector_id = OB_INVALID_ID; + if (OB_FAIL(index_schema->get_vec_index_column_id(vec_vector_id))) { + LOG_WARN("get generated column id failed", K(ret)); + } else { + ObArray vec_index_key_column_ids; + const ObColumnSchemaV2 *gen_column_schema = NULL; + if (OB_INVALID_ID == static_cast(vec_dep_col_idx_)) { + vec_dep_col_idx_ = 0; + } + if (OB_UNLIKELY(vec_vector_id <= OB_APP_MIN_COLUMN_ID || OB_INVALID_ID == vec_vector_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid vec column id", K(ret), K(vec_vector_id)); + } else if (OB_ISNULL(gen_column_schema = table_schema.get_column_schema(vec_vector_id))) { + ret = OB_SCHEMA_ERROR; + SERVER_LOG(WARN, "fail to get data table column schema", K(ret)); + } else if (OB_FAIL(gen_column_schema->get_cascaded_column_ids(vec_index_key_column_ids))) { + LOG_WARN("get cascaded column ids from column schema failed", K(ret), K(*gen_column_schema)); + } else if (vec_index_key_column_ids.count() <= vec_dep_col_idx_) { + is_sub_end = true; + vec_dep_col_idx_ = OB_INVALID_ID; + } else if (OB_FAIL(add_vec_index_column(database_name, + table_schema, + index_schema, + cells, + col_count, + vec_index_key_column_ids[vec_dep_col_idx_]))) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "fail to add normal index column", K(ret), K(col_count), K(vec_dep_col_idx_)); + } + } } else { if (OB_FAIL(add_normal_index_column(database_name, table_schema, @@ -1179,6 +1214,193 @@ int ObTableIndex::add_fulltext_index_column(const ObString &database_name, return ret; } +int ObTableIndex::add_vec_index_column(const ObString &database_name, + const ObTableSchema &table_schema, + const ObTableSchema *index_schema, + ObObj *cells, + int64_t col_count, + const uint64_t column_id) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(cells) || OB_ISNULL(index_schema) || OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "parameter or data member is NULL", K(ret), K(cells), K(index_schema), K(allocator_)); + } else if (OB_UNLIKELY(cur_row_.count_ < col_count)) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "cells count is less than output column count", + K(ret), K(cur_row_.count_), K(col_count)); + } else if (OB_UNLIKELY(OB_INVALID_ID == vec_dep_col_idx_ || vec_dep_col_idx_ < 0)) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "vec_dep_col_idx_ is wrong", K(ret)); + } else { + const ObColumnSchemaV2 *column_schema = NULL; + ObString index_name; + char *buf = NULL; + int64_t buf_len = number::ObNumber::MAX_PRINTABLE_SIZE; + if (OB_ISNULL(column_schema = table_schema.get_column_schema(column_id))) { + ret = OB_SCHEMA_ERROR; + SERVER_LOG(WARN, "fail to get data table column schema", K(ret), K(column_id)); + } else if (OB_ISNULL(buf = static_cast(allocator_->alloc(buf_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory for print buffer failed", K(ret), K(buf_len)); + } else { + uint64_t cell_idx = 0; + for (int64_t j = 0; OB_SUCC(ret) && j < col_count; ++j) { + uint64_t col_id = output_column_ids_.at(j); + switch(col_id) { + // table_id + case OB_APP_MIN_COLUMN_ID: { + cells[cell_idx].set_int(table_schema.get_table_id()); + break; + } + // key_name + case OB_APP_MIN_COLUMN_ID + 1: { + index_name.reset(); + // get the original short index name + if (OB_FAIL(ObTableSchema::get_index_name(*allocator_, + table_schema.get_table_id(), index_schema->get_table_name_str(), + index_name))) { + SERVER_LOG(WARN, "error get index table name failed", K(ret)); + } else { + cells[cell_idx].set_varchar(index_name); + cells[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + } + break; + } + // seq_in_index + case OB_APP_MIN_COLUMN_ID + 2: { + cells[cell_idx].set_int(vec_dep_col_idx_ + 1); + break; + } + // table_schema + case OB_APP_MIN_COLUMN_ID + 3: { + cells[cell_idx].set_varchar(database_name); + cells[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + // table + case OB_APP_MIN_COLUMN_ID + 4: { + cells[cell_idx].set_varchar(table_schema.get_table_name_str()); + cells[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + // non_unique + case OB_APP_MIN_COLUMN_ID + 5: { + cells[cell_idx].set_int(1/*non_unique*/); + break; + } + //index_schema + case OB_APP_MIN_COLUMN_ID + 6: { + cells[cell_idx].set_varchar(database_name); + cells[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + // column_name + case OB_APP_MIN_COLUMN_ID + 7: { + cells[cell_idx].set_varchar(column_schema->get_column_name()); + cells[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + // collation + case OB_APP_MIN_COLUMN_ID + 8: { + cells[cell_idx].set_varchar(ObString("A")); //FIXME 全部是升序吗? + cells[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + // cardinality + case OB_APP_MIN_COLUMN_ID + 9: { + //TODO 索引中唯一值的数目的估计值。通过运行ANALYZE TABLE或myisamchk -a可以更新。 + //基数根据被存储为整数的统计数据来计数,所以即使对于小型表,该值也没有必要是精确的。 + //基数越大,当进行联合时,MySQL使用该索引的机会就越大。 + cells[cell_idx].set_null(); + break; + } + // sub_part + case OB_APP_MIN_COLUMN_ID + 10: { + //TODO 如果列只是被部分地编入索引,则为被编入索引的字符的数目。如果整列被编入索引,则为NULL。 + cells[cell_idx].reset(); //清空上一行的结果 + if (column_schema->is_prefix_column()) { + //打印前缀索引的长度 + int64_t pos = 0; + if (OB_FAIL(databuff_printf(buf, buf_len, pos, "%d", column_schema->get_data_length()))) { + LOG_WARN("print prefix column data length failed", K(ret), KPC(column_schema), K(buf), K(buf_len), K(pos)); + } else { + cells[cell_idx].set_varchar(ObString(pos, buf)); + cells[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + } + } + break; + } + // packed + case OB_APP_MIN_COLUMN_ID + 11: { + //TODO 指示关键字如何被压缩。如果没有被压缩,则为NULL。 + cells[cell_idx].set_null(); + break; + } + // null + case OB_APP_MIN_COLUMN_ID + 12: { + if (column_schema->is_rowkey_column()) { + cells[cell_idx].set_varchar(ObString("")); + } else if (column_schema->is_nullable()) { + cells[cell_idx].set_varchar(ObString("YES")); + } else { + cells[cell_idx].set_varchar(ObString("")); + } + cells[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + // index_type + case OB_APP_MIN_COLUMN_ID + 13: { + cells[cell_idx].set_varchar(ObString("VECTOR")); + cells[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + // comment + case OB_APP_MIN_COLUMN_ID + 14: { + //TODO + cells[cell_idx].set_varchar(ObString(ob_index_status_str(index_schema->get_index_status()))); + cells[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + // index_comment + case OB_APP_MIN_COLUMN_ID + 15: { + cells[cell_idx].set_varchar(index_schema->get_comment_str()); + cells[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + case OB_APP_MIN_COLUMN_ID + 16: { + const ObString &is_visible = index_schema->is_index_visible() ? "YES" : "NO"; + cells[cell_idx].set_varchar(is_visible); + cells[cell_idx].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + //expression + case OB_APP_MIN_COLUMN_ID + 17: { + cells[cell_idx].set_null(); + break; + } + // is_column_visible + case OB_APP_MIN_COLUMN_ID + 18: { + cells[cell_idx].set_int(1); // TODO this value is set for SHOW EXTENDED INDEX + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "invalid column id", K(ret), K(cell_idx), + K(output_column_ids_), K(col_id)); + break; + } + } + if (OB_SUCC(ret)) { + ++cell_idx; + } + } + ++vec_dep_col_idx_; + } + } + return ret; +} + int ObTableIndex::get_show_column_name(const ObTableSchema &table_schema, const ObColumnSchemaV2 &column_schema, ObString &column_name) diff --git a/src/observer/virtual_table/ob_table_index.h b/src/observer/virtual_table/ob_table_index.h index 59705a0859..cb13e98b3c 100644 --- a/src/observer/virtual_table/ob_table_index.h +++ b/src/observer/virtual_table/ob_table_index.h @@ -85,6 +85,12 @@ private: common::ObObj *cells, int64_t col_count, const uint64_t column_id); + int add_vec_index_column(const common::ObString &database_name, + const share::schema::ObTableSchema &table_schema, + const share::schema::ObTableSchema *index_schema, + common::ObObj *cells, + int64_t col_count, + const uint64_t column_id); int get_show_column_name(const share::schema::ObTableSchema &table_schema, const share::schema::ObColumnSchemaV2 &column_schema, common::ObString &column_name); @@ -103,6 +109,7 @@ private: bool is_normal_end_; int64_t ft_dep_col_idx_; uint64_t min_data_version_; + int64_t vec_dep_col_idx_; DISALLOW_COPY_AND_ASSIGN(ObTableIndex); }; } diff --git a/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp b/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp index daf90b5f3a..9d70a3e7cc 100644 --- a/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp +++ b/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp @@ -233,6 +233,7 @@ #include "observer/virtual_table/ob_information_schema_enable_roles_table.h" #include "observer/virtual_table/ob_all_virtual_tenant_scheduler_running_job.h" #include "observer/virtual_table/ob_all_virtual_compatibility_control.h" +#include "observer/virtual_table/ob_all_virtual_vector_index_info.h" #include "observer/virtual_table/ob_all_virtual_tmp_file.h" namespace oceanbase @@ -2792,6 +2793,17 @@ int ObVTIterCreator::create_vt_iter(ObVTableScanParam ¶ms, } break; } + case OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_TID: + { + ObAllVirtualVectorIndexInfo *all_virtual_vector_index_info = NULL; + if (OB_FAIL(NEW_VIRTUAL_TABLE(ObAllVirtualVectorIndexInfo, all_virtual_vector_index_info))) { + SERVER_LOG(ERROR, "ObAllVirtualVectorIndexInfo construct failed", K(ret)); + } else { + all_virtual_vector_index_info->set_addr(addr_); + vt_iter = static_cast(all_virtual_vector_index_info); + } + break; + } END_CREATE_VT_ITER_SWITCH_LAMBDA #define AGENT_VIRTUAL_TABLE_CREATE_ITER diff --git a/src/pl/CMakeLists.txt b/src/pl/CMakeLists.txt index 88701e9818..a1be5b1fc2 100644 --- a/src/pl/CMakeLists.txt +++ b/src/pl/CMakeLists.txt @@ -89,6 +89,7 @@ ob_set_subtarget(ob_pl sys_package sys_package/ob_pl_dbms_resource_manager.cpp sys_package/ob_pl_dbms_trusted_certificate_manager.cpp sys_package/ob_dbms_limit_calculator_mysql.cpp + sys_package/ob_dbms_vector_mysql.cpp sys_package/ob_dbms_external_table.cpp ) diff --git a/src/pl/ob_pl_interface_pragma.h b/src/pl/ob_pl_interface_pragma.h index e17c96c19b..694a06ba5f 100644 --- a/src/pl/ob_pl_interface_pragma.h +++ b/src/pl/ob_pl_interface_pragma.h @@ -74,6 +74,7 @@ #include "pl/sys_package/ob_pl_dbms_trusted_certificate_manager.h" #include "pl/sys_package/ob_dbms_limit_calculator_mysql.h" #include "pl/sys_package/ob_dbms_external_table.h" +#include "pl/sys_package/ob_dbms_vector_mysql.h" #ifdef INTERFACE_DEF INTERFACE_DEF(INTERFACE_START, "TEST", (ObPLInterfaceImpl::call)) @@ -770,6 +771,17 @@ INTERFACE_DEF(INTERFACE_DBMS_PROFILER_DROP_OBJECTS, "DBMS_PROFILER_DROP_OBJECTS", (ObDBMSProfiler::drop_objects)) // end of dbms_profiler #endif // OB_BUILD_ORACLE_PL + + // start of dbms_vector_mysql +#define DEFINE_DBMS_VECTOR_MYSQL_INTERFACE(symbol, func) \ + INTERFACE_DEF(INTERFACE_##symbol, #symbol, (func)) + + DEFINE_DBMS_VECTOR_MYSQL_INTERFACE(DBMS_VECTOR_MYSQL_REFRESH_INDEX, ObDBMSVectorMySql::refresh_index) + DEFINE_DBMS_VECTOR_MYSQL_INTERFACE(DBMS_VECTOR_MYSQL_REBUILD_INDEX, ObDBMSVectorMySql::rebuild_index) + +#undef DEFINE_DBMS_VECTOR_MYSQL_INTERFACE + // end of dbms_vector_mysql + /****************************************************************************/ // start of dbms_trusted_certificate_manager diff --git a/src/pl/ob_pl_package_manager.cpp b/src/pl/ob_pl_package_manager.cpp index ee8e7683ee..4a046f4d83 100644 --- a/src/pl/ob_pl_package_manager.cpp +++ b/src/pl/ob_pl_package_manager.cpp @@ -503,7 +503,8 @@ static const ObSysPackageFile mysql_syspack_file_list[] = { {"dbms_trusted_certificate_manager", "dbms_trusted_certificate_manager_mysql.sql", "dbms_trusted_certificate_manager_body_mysql.sql"}, {"dbms_ob_limit_calculator", "dbms_ob_limit_calculator_mysql.sql", "dbms_ob_limit_calculator_body_mysql.sql"}, {"dbms_external_table", "dbms_external_table_mysql.sql", "dbms_external_table_body_mysql.sql"}, - {"external_table_alert_log", "external_table_alert_log.sql", nullptr} + {"external_table_alert_log", "external_table_alert_log.sql", nullptr}, + {"dbms_vector", "dbms_vector_mysql.sql", "dbms_vector_body_mysql.sql"} }; // for now! we only have one special system package "__DBMS_UPGRADE" diff --git a/src/pl/sys_package/ob_dbms_vector_mysql.cpp b/src/pl/sys_package/ob_dbms_vector_mysql.cpp new file mode 100644 index 0000000000..0b458b0161 --- /dev/null +++ b/src/pl/sys_package/ob_dbms_vector_mysql.cpp @@ -0,0 +1,110 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX PL + +#include "pl/sys_package/ob_dbms_vector_mysql.h" +#include "storage/vector_index/cmd/ob_vector_refresh_index_executor.h" + +namespace oceanbase +{ +namespace pl +{ +using namespace common; +using namespace sql; +using namespace storage; + +/* +PROCEDURE refresh_index( + IN IDX_NAME VARCHAR(65535), ---- 索引名 + IN TABLE_NAME VARCHAR(65535), ---- 表名 + IN IDX_VECTOR_COL VARCHAR(65535) DEFAULT NULL, ---- 向量列名 + IN REFRESH_THRESHOLD INT DEFAULT 10000, ---- 3号表记录数达到阈值,触发增量刷新 + IN REFRESH_TYPE VARCHAR(65535) DEFAULT NULL ---- 预留: 目前默行为是增量刷新: FAST +); +*/ +int ObDBMSVectorMySql::refresh_index(ObPLExecCtx &ctx, ParamStore ¶ms, ObObj &result) +{ + UNUSED(result); + int ret = OB_SUCCESS; + CK(GET_MIN_CLUSTER_VERSION() >= CLUSTER_VERSION_4_3_2_0); + CK(OB_LIKELY(5 == params.count())); + CK(OB_LIKELY(params.at(0).is_varchar()), + OB_LIKELY(params.at(1).is_varchar()), + OB_LIKELY(params.at(2).is_null() || params.at(2).is_varchar()), + OB_LIKELY(params.at(3).is_int32()), + OB_LIKELY(params.at(4).is_null() || params.at(4).is_varchar())); + if (OB_SUCC(ret)) { + ObVectorRefreshIndexArg refresh_arg; + ObVectorRefreshIndexExecutor refresh_executor; + refresh_arg.idx_name_ = params.at(0).get_varchar(); + refresh_arg.table_name_ = params.at(1).get_varchar(); + params.at(2).is_varchar() ? refresh_arg.idx_vector_col_ = params.at(2).get_varchar() : NULL; + refresh_arg.refresh_threshold_ = params.at(3).get_int(); + params.at(4).is_varchar() ? refresh_arg.refresh_type_ = params.at(4).get_varchar() : NULL; + if (OB_FAIL(refresh_executor.execute_refresh(ctx, refresh_arg))) { + LOG_WARN("fail to execute refresh index", KR(ret), K(refresh_arg)); + } + } + return ret; +} + +/* +PROCEDURE rebuild_index ( + IN IDX_NAME VARCHAR(65535), ---- 索引名 + IN TABLE_NAME VARCHAR(65535), ---- 表名 + IN IDX_VECTOR_COL VARCHAR(65535) DEFAULT NULL, ---- 向量列名 + IN DELTA_RATE_THRESHOLD FLOAT DEFAULT 0.2, ---- (3号表记录数+4号表记录数)/基表记录数达到阈值时,触发重建 + IN IDX_ORGANIZATION VARCHAR(65535) DEFAULT NULL, ---- 索引类型,本期不允许rebuild修改索引类型 + IN IDX_DISTANCE_METRICS VARCHAR(65535) DEFAULT 'EUCLIDEAN', ---- 距离类型,本期不允许修改 + IN IDX_PARAMETERS LONGTEXT DEFAULT NULL, ---- 索引参数,本期不允许修改 + IN IDX_PARALLEL_CREATION INT DEFAULT 1 ---- 并行构建索引的并行度,预留,仅语法支持 +); +*/ +int ObDBMSVectorMySql::rebuild_index(ObPLExecCtx &ctx, ParamStore ¶ms, ObObj &result) +{ + UNUSED(result); + int ret = OB_SUCCESS; + CK(GET_MIN_CLUSTER_VERSION() >= CLUSTER_VERSION_4_3_2_0); + CK(OB_LIKELY(8 == params.count())); + CK(OB_LIKELY(params.at(0).is_varchar()), + OB_LIKELY(params.at(1).is_varchar()), + OB_LIKELY(params.at(2).is_null() || params.at(2).is_varchar()), + OB_LIKELY(params.at(3).is_float()), + OB_LIKELY(params.at(4).is_null() || params.at(4).is_varchar()), + OB_LIKELY(params.at(5).is_varchar()), + OB_LIKELY(params.at(6).is_null() || params.at(6).is_text()), + OB_LIKELY(params.at(7).is_int32())); + if (OB_SUCC(ret)) { + ObVectorRebuildIndexArg rebuild_arg; + ObVectorRefreshIndexExecutor rebuild_executor; + rebuild_arg.idx_name_ = params.at(0).get_varchar(); + rebuild_arg.table_name_ = params.at(1).get_varchar(); + params.at(2).is_varchar() ? rebuild_arg.idx_vector_col_ = params.at(2).get_varchar() : NULL; + rebuild_arg.delta_rate_threshold_ = params.at(3).get_float(); + params.at(4).is_varchar() ? rebuild_arg.idx_organization_ = params.at(4).get_varchar() : NULL; + rebuild_arg.idx_distance_metrics_ = params.at(5).get_varchar(); + rebuild_arg.idx_parallel_creation_ = params.at(7).get_int(); + + rebuild_arg.idx_parameters_ = NULL; + if (params.at(6).is_text() && OB_FAIL(params.at(6).get_string(rebuild_arg.idx_parameters_))) { + LOG_WARN("fail to get string", K(ret)); + } else if (OB_FAIL(rebuild_executor.execute_rebuild(ctx, rebuild_arg))) { + LOG_WARN("fail to execute refresh index", KR(ret), K(rebuild_arg)); + } + } + return ret; +} + + +} +} \ No newline at end of file diff --git a/src/pl/sys_package/ob_dbms_vector_mysql.h b/src/pl/sys_package/ob_dbms_vector_mysql.h new file mode 100644 index 0000000000..6f17d3305c --- /dev/null +++ b/src/pl/sys_package/ob_dbms_vector_mysql.h @@ -0,0 +1,38 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#pragma once + +#include "pl/ob_pl.h" + +namespace oceanbase +{ +namespace pl +{ + +class ObDBMSVectorMySql +{ +public: + ObDBMSVectorMySql() {} + virtual ~ObDBMSVectorMySql() {} + +#define DECLARE_FUNC(func) \ + static int func(ObPLExecCtx &ctx, sql::ParamStore ¶ms, common::ObObj &result); + + DECLARE_FUNC(refresh_index); + DECLARE_FUNC(rebuild_index); + +#undef DECLARE_FUNC +}; + +} // namespace pl +} // namespace oceanbase \ No newline at end of file diff --git a/src/rootserver/CMakeLists.txt b/src/rootserver/CMakeLists.txt index 46afd7cbf6..448cba5a6e 100644 --- a/src/rootserver/CMakeLists.txt +++ b/src/rootserver/CMakeLists.txt @@ -123,6 +123,7 @@ ob_set_subtarget(ob_rootserver ddl_task ddl_task/ob_ddl_task.cpp ddl_task/ob_drop_index_task.cpp ddl_task/ob_drop_fts_index_task.cpp + ddl_task/ob_drop_vec_index_task.cpp ddl_task/ob_drop_primary_key_task.cpp ddl_task/ob_index_build_task.cpp ddl_task/ob_build_mview_task.cpp @@ -131,6 +132,8 @@ ob_set_subtarget(ob_rootserver ddl_task ddl_task/ob_table_redefinition_task.cpp ddl_task/ob_recover_restore_table_task.cpp ddl_task/ob_ddl_tablet_scheduler.cpp + ddl_task/ob_rebuild_index_task.cpp + ddl_task/ob_vec_index_build_task.cpp ) ob_set_subtarget(ob_rootserver parallel_ddl diff --git a/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp b/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp index ef4070bf4c..746f8b3fda 100644 --- a/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp +++ b/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp @@ -343,186 +343,20 @@ int ObDDLRedefinitionTask::check_table_empty(const ObDDLTaskStatus next_task_sta return ret; } -int ObDDLRedefinitionTask::hold_snapshot(const int64_t snapshot_version) -{ - int ret = OB_SUCCESS; - ObRootService *root_service = GCTX.root_service_; - ObSEArray tablet_ids; - SCN snapshot_scn; - ObSchemaGetterGuard schema_guard; - const ObTableSchema *data_table_schema = nullptr; - const ObTableSchema *dest_table_schema = nullptr; - ObMultiVersionSchemaService &schema_service = ObMultiVersionSchemaService::get_instance(); - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret)); - } else if (OB_ISNULL(root_service)) { - ret = OB_ERR_SYS; - LOG_WARN("error sys, root service must not be nullptr", K(ret)); - } else if (OB_UNLIKELY(snapshot_version < 0)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), K(snapshot_version)); - } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_HOLD_SNAPSHOT_FAILED))) { - LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); - } else if (OB_FAIL(snapshot_scn.convert_for_tx(snapshot_version))) { - LOG_WARN("failed to convert", K(snapshot_version), K(ret)); - } else if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id_, schema_guard))) { - LOG_WARN("get tenant schema guard failed", K(ret)); - } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, object_id_, data_table_schema))) { - LOG_WARN("get table schema failed", K(ret), K(object_id_)); - } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, target_object_id_, dest_table_schema))) { - LOG_WARN("get table schema failed", K(ret), K(target_object_id_)); - } else if (OB_ISNULL(data_table_schema) || OB_ISNULL(dest_table_schema)) { - ret = OB_TABLE_NOT_EXIST; - LOG_WARN("table not exist", K(ret), K(object_id_), K(target_object_id_), KP(data_table_schema), KP(dest_table_schema)); - } else if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, object_id_, tablet_ids))) { - LOG_WARN("failed to get data table snapshot", K(ret)); - } else if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, target_object_id_, tablet_ids))) { - LOG_WARN("failed to get dest table snapshot", K(ret)); - } else if (data_table_schema->get_aux_lob_meta_tid() != OB_INVALID_ID && - OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, data_table_schema->get_aux_lob_meta_tid(), tablet_ids))) { - LOG_WARN("failed to get data lob meta table snapshot", K(ret)); - } else if (data_table_schema->get_aux_lob_piece_tid() != OB_INVALID_ID && - OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, data_table_schema->get_aux_lob_piece_tid(), tablet_ids))) { - LOG_WARN("failed to get data lob piece table snapshot", K(ret)); - } else if (dest_table_schema->get_aux_lob_meta_tid() != OB_INVALID_ID && - OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, dest_table_schema->get_aux_lob_meta_tid(), tablet_ids))) { - LOG_WARN("failed to get dest lob meta table snapshot", K(ret)); - } else if (dest_table_schema->get_aux_lob_piece_tid() != OB_INVALID_ID && - OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, dest_table_schema->get_aux_lob_piece_tid(), tablet_ids))) { - LOG_WARN("failed to get dest lob piece table snapshot", K(ret)); - } else { - ObDDLService &ddl_service = root_service->get_ddl_service(); - if (OB_FAIL(ddl_service.get_snapshot_mgr().batch_acquire_snapshot( - ddl_service.get_sql_proxy(), SNAPSHOT_FOR_DDL, tenant_id_, schema_version_, snapshot_scn, nullptr, tablet_ids))) { - LOG_WARN("batch acquire snapshot failed", K(ret), K(tablet_ids)); - } - } - add_event_info("hold snapshot finish"); - LOG_INFO("hold snapshot finished", K(ret), K(snapshot_version), K(object_id_), K(target_object_id_), K(schema_version_), "ddl_event_info", ObDDLEventInfo()); - return ret; -} - -int ObDDLRedefinitionTask::release_snapshot(const int64_t snapshot_version) -{ - int ret = OB_SUCCESS; - ObRootService *root_service = GCTX.root_service_; - ObSEArray tablet_ids; - ObSchemaGetterGuard schema_guard; - const ObTableSchema *data_table_schema = nullptr; - const ObTableSchema *dest_table_schema = nullptr; - ObMultiVersionSchemaService &schema_service = ObMultiVersionSchemaService::get_instance(); - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", K(ret)); - } else if (OB_ISNULL(root_service)) { - ret = OB_ERR_SYS; - LOG_WARN("error sys, root service must not be nullptr", K(ret)); - } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_RELEASE_SNAPSHOT_FAILED))) { - LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); - } else if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id_, schema_guard))) { - LOG_WARN("get tenant schema guard failed", K(ret)); - } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, object_id_, data_table_schema))) { - LOG_WARN("get table schema failed", K(ret), K(object_id_)); - } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, target_object_id_, dest_table_schema))) { - LOG_WARN("get table schema failed", K(ret), K(target_object_id_)); - } else if (OB_ISNULL(data_table_schema) || OB_ISNULL(dest_table_schema)) { - ret = OB_TABLE_NOT_EXIST; - LOG_WARN("table not exist", K(ret), K(object_id_), K(target_object_id_), KP(data_table_schema), KP(dest_table_schema)); - } else if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, object_id_, tablet_ids))) { - LOG_WARN("failed to get data table snapshot", K(ret)); - } else if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, target_object_id_, tablet_ids))) { - LOG_WARN("failed to get dest table snapshot", K(ret)); - } else if (data_table_schema->get_aux_lob_meta_tid() != OB_INVALID_ID && - OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, data_table_schema->get_aux_lob_meta_tid(), tablet_ids))) { - LOG_WARN("failed to get data lob meta table snapshot", K(ret)); - } else if (data_table_schema->get_aux_lob_piece_tid() != OB_INVALID_ID && - OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, data_table_schema->get_aux_lob_piece_tid(), tablet_ids))) { - LOG_WARN("failed to get data lob piece table snapshot", K(ret)); - } else if (dest_table_schema->get_aux_lob_meta_tid() != OB_INVALID_ID && - OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, dest_table_schema->get_aux_lob_meta_tid(), tablet_ids))) { - LOG_WARN("failed to get dest lob meta table snapshot", K(ret)); - } else if (dest_table_schema->get_aux_lob_piece_tid() != OB_INVALID_ID && - OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, dest_table_schema->get_aux_lob_piece_tid(), tablet_ids))) { - LOG_WARN("failed to get dest lob piece table snapshot", K(ret)); - } else if (OB_FAIL(batch_release_snapshot(snapshot_version, tablet_ids))) { - LOG_WARN("failed to release snapshot", K(ret)); - } - add_event_info("release snapshot finish"); - LOG_INFO("release snapshot finished", K(ret), K(snapshot_version), K(object_id_), K(target_object_id_), K(schema_version_), "ddl_event_info", ObDDLEventInfo()); - return ret; -} - // to hold snapshot, containing data in old table with new schema version. int ObDDLRedefinitionTask::obtain_snapshot(const ObDDLTaskStatus next_task_status) { int ret = OB_SUCCESS; - int tmp_ret = OB_SUCCESS; - ObDDLTaskStatus new_status = ObDDLTaskStatus::OBTAIN_SNAPSHOT; - ObRootService *root_service = GCTX.root_service_; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret)); - } else if (OB_ISNULL(root_service)) { - ret = OB_ERR_SYS; - LOG_WARN("error sys, root service must not be nullptr", K(ret)); } else if (snapshot_version_ > 0 && snapshot_held_) { // do nothing, already hold snapshot. - } else if (!wait_trans_ctx_.is_inited()) { - if (OB_FAIL(wait_trans_ctx_.init(tenant_id_, task_id_, object_id_, ObDDLWaitTransEndCtx::WAIT_SCHEMA_TRANS, schema_version_))) { - LOG_WARN("fail to init wait trans ctx", K(ret)); - } + } else if (OB_FAIL(ObDDLUtil::obtain_snapshot(next_task_status, object_id_, + target_object_id_, snapshot_version_, + snapshot_held_, this))) { + LOG_WARN("fail to obtain_snapshot", K(ret), K(snapshot_version_), K(snapshot_held_)); } - // to get snapshot version. - if (OB_SUCC(ret) && snapshot_version_ <= 0) { - bool is_trans_end = false; - const bool need_wait_trans_end = false; - if (OB_FAIL(wait_trans_ctx_.try_wait(is_trans_end, snapshot_version_, need_wait_trans_end))) { - LOG_WARN("just to get snapshot rather than wait trans end", K(ret)); - } - } - DEBUG_SYNC(DDL_REDEFINITION_HOLD_SNAPSHOT); - // try hold snapshot - if (OB_FAIL(ret)) { - } else if (snapshot_version_ <= 0) { - // the snapshot version obtained here must be valid. - ret = OB_ERR_UNEXPECTED; - LOG_WARN("snapshot version is invalid", K(ret), K(tenant_id_), K(object_id_), K(schema_version_)); - } else if (snapshot_version_ > 0 && !snapshot_held_) { - if (OB_FAIL(ObDDLTaskRecordOperator::update_snapshot_version(root_service->get_sql_proxy(), - tenant_id_, - task_id_, - snapshot_version_))) { - LOG_WARN("update snapshot version failed", K(ret), K(task_id_)); - } else if (OB_FAIL(hold_snapshot(snapshot_version_))) { - if (OB_SNAPSHOT_DISCARDED == ret) { - snapshot_version_ = 0; - snapshot_held_ = false; - wait_trans_ctx_.reset(); - } else { - LOG_WARN("hold snapshot version failed", K(ret)); - } - } else { - snapshot_held_ = true; - } - } - - if (OB_FAIL(ret)) { - if (OB_SNAPSHOT_DISCARDED == ret) { - ret = OB_SUCCESS; - } else { - LOG_WARN("fail to obtain snapshot version", K(ret)); - } - } else { - new_status = next_task_status; - } - if (new_status == next_task_status || OB_FAIL(ret)) { - if (OB_FAIL(switch_status(new_status, true, ret))) { - LOG_WARN("fail to switch task status", K(ret)); - } - } - add_event_info("obtain snapshot finish"); - LOG_INFO("obtain snapshot", K(ret), K(snapshot_version_), K(object_id_), K(target_object_id_), K(schema_version_), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -748,7 +582,7 @@ int ObDDLRedefinitionTask::check_build_single_replica(bool &is_end) if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret)); - } else if (OB_FAIL(replica_builder_.check_build_end(is_end, complete_sstable_job_ret_code_))) { + } else if (OB_FAIL(replica_builder_.check_build_end(true/*need check sum*/, is_end, complete_sstable_job_ret_code_))) { LOG_WARN("fail to check build end", K(ret)); } else if (!is_end) { if (sstable_complete_request_time_ + ObDDLUtil::calc_inner_sql_execute_timeout() < ObTimeUtility::current_time()) { // timeout, retry @@ -1383,7 +1217,7 @@ int ObDDLRedefinitionTask::finish() LOG_WARN("error sys, root service must not be nullptr", K(ret)); } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_FINISH_FAILED))) { LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); - } else if (snapshot_version_ > 0 && OB_FAIL(release_snapshot(snapshot_version_))) { + } else if (snapshot_version_ > 0 && OB_FAIL(ObDDLUtil::release_snapshot(this, object_id_, target_object_id_, snapshot_version_))) { LOG_WARN("release snapshot failed", K(ret)); } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard(tenant_id_, schema_guard))) { LOG_WARN("get schema guard failed", K(ret), K(tenant_id_)); @@ -2850,117 +2684,12 @@ int ObDDLRedefinitionTask::check_and_cancel_complement_data_dag(bool &all_comple { int ret = OB_SUCCESS; all_complement_dag_exit = false; - const bool force_renew = true; - bool is_cache_hit = false; - const int64_t expire_renew_time = force_renew ? INT64_MAX : 0; - share::ObLocationService *location_service = GCTX.location_service_; - ObRootService *root_service = GCTX.root_service_; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); - } else if (OB_ISNULL(location_service) || OB_ISNULL(root_service)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null", K(ret), KP(location_service), KP(root_service)); - } else if (OB_UNLIKELY(!check_dag_exit_tablets_map_.created())) { - const int64_t CHECK_DAG_EXIT_BUCKET_NUM = 64; - common::ObArray src_tablet_ids; - common::ObArray dst_tablet_ids; - if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, object_id_, src_tablet_ids))) { - LOG_WARN("fail to get tablets", K(ret), K(tenant_id_), K(object_id_)); - } else if (OB_FAIL(ObDDLUtil::get_tablets(dst_tenant_id_, target_object_id_, dst_tablet_ids))) { - LOG_WARN("fail to get tablets", K(ret), K(dst_tenant_id_), K(target_object_id_)); - } else if (OB_FAIL(check_dag_exit_tablets_map_.create(CHECK_DAG_EXIT_BUCKET_NUM, lib::ObLabel("DDLChkDagMap")))) { - LOG_WARN("create hashset set failed", K(ret)); - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < src_tablet_ids.count(); i++) { - if (OB_FAIL(check_dag_exit_tablets_map_.set_refactored(src_tablet_ids.at(i), dst_tablet_ids.at(i)))) { - LOG_WARN("set refactored failed", K(ret)); - } - } - } - } - if (OB_SUCC(ret)) { - int saved_ret = OB_SUCCESS; - ObAddr unused_leader_addr; - const int64_t timeout_us = ObDDLUtil::get_default_ddl_rpc_timeout(); - common::hash::ObHashMap ::const_iterator iter = - check_dag_exit_tablets_map_.begin(); - ObArray dag_not_exist_tablets; - for (; OB_SUCC(ret) && iter != check_dag_exit_tablets_map_.end(); iter++) { - ObLSID src_ls_id; - ObLSID dst_ls_id; - const common::ObTabletID &src_tablet_id = iter->first; - const common::ObTabletID &dst_tablet_id = iter->second; - int64_t paxos_member_count = 0; - common::ObArray paxos_server_list; - if (OB_FAIL(ObDDLUtil::get_tablet_leader_addr(location_service, tenant_id_, src_tablet_id, timeout_us, src_ls_id, unused_leader_addr))) { - LOG_WARN("get src tablet leader addr failed", K(ret)); - } else if (OB_FAIL(ObDDLUtil::get_tablet_leader_addr(location_service, dst_tenant_id_, dst_tablet_id, timeout_us, dst_ls_id, unused_leader_addr))) { - LOG_WARN("get dst tablet leader addr failed", K(ret)); - } else if (OB_FAIL(ObDDLUtil::get_tablet_paxos_member_list(dst_tenant_id_, dst_tablet_id, paxos_server_list, paxos_member_count))) { - LOG_WARN("get tablet paxos member list failed", K(ret)); - } else { - bool is_tablet_dag_exist = false; - obrpc::ObDDLBuildSingleReplicaRequestArg arg; - arg.ls_id_ = src_ls_id; - arg.dest_ls_id_ = dst_ls_id; - arg.tenant_id_ = tenant_id_; - arg.dest_tenant_id_ = dst_tenant_id_; - arg.source_tablet_id_ = src_tablet_id; - arg.dest_tablet_id_ = dst_tablet_id; - arg.source_table_id_ = object_id_; - arg.dest_schema_id_ = target_object_id_; - arg.schema_version_ = schema_version_; - arg.dest_schema_version_ = dst_schema_version_; - arg.snapshot_version_ = 1; // to ensure arg valid only. - arg.ddl_type_ = task_type_; - arg.task_id_ = task_id_; - arg.parallelism_ = 1; // to ensure arg valid only. - arg.execution_id_ = 1; // to ensure arg valid only. - arg.data_format_version_ = 1; // to ensure arg valid only. - arg.tablet_task_id_ = 1; // to ensure arg valid only. - arg.consumer_group_id_ = 0; // to ensure arg valid only. - for (int64_t j = 0; OB_SUCC(ret) && j < paxos_server_list.count(); j++) { - int tmp_ret = OB_SUCCESS; - obrpc::Bool is_replica_dag_exist(true); - if (OB_TMP_FAIL(root_service->get_rpc_proxy().to(paxos_server_list.at(j)) - .by(dst_tenant_id_).timeout(timeout_us).check_and_cancel_ddl_complement_dag(arg, is_replica_dag_exist))) { - // consider as dag does exist in this server. - saved_ret = OB_SUCC(saved_ret) ? tmp_ret : saved_ret; - is_tablet_dag_exist = true; - LOG_WARN("check and cancel ddl complement dag failed", K(ret), K(tmp_ret), K(arg)); - } else if (is_replica_dag_exist) { - is_tablet_dag_exist = true; - if (REACH_COUNT_INTERVAL(1000L)) { - LOG_INFO("wait dag exist", "addr", paxos_server_list.at(j), K(arg)); - } - } - } - if (OB_SUCC(ret) && !is_tablet_dag_exist) { - if (OB_FAIL(dag_not_exist_tablets.push_back(src_tablet_id))) { - LOG_WARN("push back failed", K(ret)); - } - } - } - } - if (OB_SUCC(ret)) { - for (int64_t j = 0; OB_SUCC(ret) && j < dag_not_exist_tablets.count(); j++) { - if (OB_FAIL(check_dag_exit_tablets_map_.erase_refactored(dag_not_exist_tablets.at(j)))) { - LOG_WARN("erase failed", K(ret)); - } - } - ret = OB_SUCC(ret) ? saved_ret : ret; - } - } - if (OB_SUCC(ret)) { - all_complement_dag_exit = check_dag_exit_tablets_map_.empty() ? true : false; - delay_schedule_time_ = 3000L * 1000L; // 3s, to avoid sending too many rpcs to the same replica frequently if retry. - } else if (OB_TABLE_NOT_EXIST == ret - || OB_TENANT_HAS_BEEN_DROPPED == ret - || OB_TENANT_NOT_EXIST == ret - || (++check_dag_exit_retry_cnt_ >= 10 /*MAX RETRY COUNT IF FAILED*/)) { - ret = OB_SUCCESS; - all_complement_dag_exit = true; + } else if (OB_FAIL(ObDDLUtil::check_and_cancel_single_replica_dag(this, object_id_, target_object_id_, + check_dag_exit_tablets_map_, check_dag_exit_retry_cnt_, true /*is_complement_data_dag*/, all_complement_dag_exit))) { + LOG_WARN("failed to check and cancel complement data dag", K(ret)); } return ret; } diff --git a/src/rootserver/ddl_task/ob_ddl_redefinition_task.h b/src/rootserver/ddl_task/ob_ddl_redefinition_task.h index ed98b672cd..ce517a62a4 100644 --- a/src/rootserver/ddl_task/ob_ddl_redefinition_task.h +++ b/src/rootserver/ddl_task/ob_ddl_redefinition_task.h @@ -174,8 +174,6 @@ protected: virtual int check_and_cancel_complement_data_dag(bool &all_complement_dag_exit); // wait dag exit before unlock table. virtual int fail(); virtual int success(); - int hold_snapshot(const int64_t snapshot_version); - int release_snapshot(const int64_t snapshot_version); int add_constraint_ddl_task(const int64_t constraint_id); int add_fk_ddl_task(const int64_t fk_id); int sync_auto_increment_position(); diff --git a/src/rootserver/ddl_task/ob_ddl_scheduler.cpp b/src/rootserver/ddl_task/ob_ddl_scheduler.cpp index d39f90d3ee..25d92bb397 100755 --- a/src/rootserver/ddl_task/ob_ddl_scheduler.cpp +++ b/src/rootserver/ddl_task/ob_ddl_scheduler.cpp @@ -23,6 +23,7 @@ #include "rootserver/ddl_task/ob_ddl_task.h" #include "rootserver/ddl_task/ob_drop_index_task.h" #include "rootserver/ddl_task/ob_drop_fts_index_task.h" +#include "rootserver/ddl_task/ob_drop_vec_index_task.h" #include "rootserver/ddl_task/ob_drop_primary_key_task.h" #include "rootserver/ddl_task/ob_index_build_task.h" #include "rootserver/ddl_task/ob_build_mview_task.h" @@ -31,6 +32,8 @@ #include "rootserver/ddl_task/ob_table_redefinition_task.h" #include "rootserver/ddl_task/ob_recover_restore_table_task.h" #include "rootserver/ddl_task/ob_build_mview_task.h" +#include "rootserver/ddl_task/ob_rebuild_index_task.h" +#include "rootserver/ddl_task/ob_vec_index_build_task.h" #include "share/ob_ddl_common.h" #include "share/ob_rpc_struct.h" #include "share/longops_mgr/ob_longops_mgr.h" @@ -973,6 +976,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, const obrpc::ObAlterTableArg *alter_table_arg = nullptr; const obrpc::ObCreateIndexArg *create_index_arg = nullptr; const obrpc::ObDropIndexArg *drop_index_arg = nullptr; + const obrpc::ObRebuildIndexArg *rebuild_index_arg = nullptr; const obrpc::ObMViewCompleteRefreshArg *mview_complete_refresh_arg = nullptr; ObRootService *root_service = GCTX.root_service_; LOG_INFO("create ddl task", K(param)); @@ -1032,6 +1036,21 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, LOG_WARN("fail to create build fts index task", K(ret)); } break; + case DDL_CREATE_VEC_INDEX: + create_index_arg = static_cast(param.ddl_arg_); + if (OB_FAIL(create_build_vec_index_task(proxy, + param.src_table_schema_, + param.dest_table_schema_, + param.parallelism_, + param.parent_task_id_, + param.consumer_group_id_, + create_index_arg, + param.tenant_data_version_, + *param.allocator_, + task_record))) { + LOG_WARN("fail to create build vec index task", K(ret)); + } + break; case DDL_DROP_INDEX: case DDL_DROP_MLOG: // in this case, src_table_schema is data table, dest_table_schema is index table @@ -1062,6 +1081,23 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, LOG_WARN("fail to create drop fts index task", K(ret)); } break; + case DDL_DROP_VEC_INDEX: + drop_index_arg = static_cast(param.ddl_arg_); + if (OB_FAIL(create_drop_vec_index_task(proxy, + param.src_table_schema_, + param.schema_version_, + param.consumer_group_id_, + param.vec_vid_rowkey_schema_, + param.vec_rowkey_vid_schema_, + param.vec_index_id_schema_, + param.vec_snapshot_data_schema_, + param.tenant_data_version_, + drop_index_arg, + *param.allocator_, + task_record))) { + LOG_WARN("fail to create drop vec index task", K(ret)); + } + break; case DDL_MODIFY_COLUMN: case DDL_ADD_PRIMARY_KEY: case DDL_ALTER_PRIMARY_KEY: @@ -1090,6 +1126,22 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, LOG_WARN("fail to create table redefinition task", K(ret)); } break; + case DDL_REBUILD_INDEX: + rebuild_index_arg = static_cast(param.ddl_arg_); + if (OB_FAIL(create_rebuild_index_task(proxy, + param.type_, + param.src_table_schema_, + param.parallelism_, + param.parent_task_id_, + param.consumer_group_id_, + param.sub_task_trace_id_, + rebuild_index_arg, + param.tenant_data_version_, + *param.allocator_, + task_record))) { + LOG_WARN("fail to create rebuild index task", KR(ret)); + } + break; case DDL_CREATE_MVIEW: mview_complete_refresh_arg = static_cast(param.ddl_arg_); if (OB_FAIL(create_build_mview_task(proxy, @@ -1593,6 +1645,52 @@ int ObDDLScheduler::create_build_fts_index_task( return ret; } +int ObDDLScheduler::create_build_vec_index_task( + common::ObISQLClient &proxy, + const ObTableSchema *data_table_schema, + const ObTableSchema *index_schema, + const int64_t parallelism, + const int64_t parent_task_id, + const int64_t consumer_group_id, + const obrpc::ObCreateIndexArg *create_index_arg, + const uint64_t tenant_data_version, + ObIAllocator &allocator, + ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + int64_t task_id = 0; + SMART_VAR(ObVecIndexBuildTask, index_task) { + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(create_index_arg) || OB_ISNULL(data_table_schema) + || OB_ISNULL(index_schema) || OB_UNLIKELY(tenant_data_version <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(create_index_arg), + KPC(data_table_schema), KPC(index_schema)); + } else if (OB_FAIL(ObDDLTask::fetch_new_task_id(root_service_->get_sql_proxy(), data_table_schema->get_tenant_id(), task_id))) { + LOG_WARN("fetch new task id failed", K(ret)); + } else if (OB_FAIL(index_task.init(data_table_schema->get_tenant_id(), + task_id, + data_table_schema, + index_schema, + data_table_schema->get_schema_version(), + parallelism, + consumer_group_id, + *create_index_arg, + tenant_data_version, + parent_task_id))) { + LOG_WARN("init fts index task failed", K(ret), K(data_table_schema), K(index_schema)); + } else if (OB_FAIL(index_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { + LOG_WARN("set trace id failed", K(ret)); + } else if (OB_FAIL(insert_task_record(proxy, index_task, allocator, task_record))) { + LOG_WARN("fail to insert task record", K(ret)); + } + LOG_INFO("ddl_scheduler create build vec index task finished", K(ret), K(index_task)); + } + return ret; +} + int ObDDLScheduler::create_build_index_task( common::ObISQLClient &proxy, const share::ObDDLType &ddl_type, @@ -1775,6 +1873,101 @@ int ObDDLScheduler::create_drop_fts_index_task( return ret; } + +int ObDDLScheduler::create_drop_vec_index_task( + common::ObISQLClient &proxy, + const share::schema::ObTableSchema *index_schema, + const int64_t schema_version, + const int64_t consumer_group_id, + const share::schema::ObTableSchema *vid_rowkey_schema, + const share::schema::ObTableSchema *rowkey_vid_schema, + const share::schema::ObTableSchema *index_id_schema, + const share::schema::ObTableSchema *snapshot_data_schema, + const uint64_t tenant_data_version, + const obrpc::ObDropIndexArg *drop_index_arg, + ObIAllocator &allocator, + ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + int64_t task_id = 0; + ObDropVecIndexTask index_task; + common::ObString vec_domain_index_name; + common::ObString vec_vid_rowkey_name; + common::ObString vec_rowkey_vid_name; + common::ObString vec_index_id_name; + common::ObString vec_snapshot_data_name; + + // multivalue index may run here, need calc index type first + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(index_schema), KP(drop_index_arg)); + } else if (OB_UNLIKELY(schema_version <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(index_schema), K(schema_version)); + } else if (OB_FAIL(ObDDLTask::fetch_new_task_id(root_service_->get_sql_proxy(), index_schema->get_tenant_id(), task_id))) { + LOG_WARN("fetch new task id failed", K(ret)); + } else if (OB_FAIL(index_schema->get_index_name(vec_domain_index_name))) { + LOG_WARN("fail to get vec index name", K(ret), KPC(index_schema)); + } else { + if (OB_FAIL(ret) || OB_ISNULL(vid_rowkey_schema)) { + } else if (OB_FAIL(vid_rowkey_schema->get_index_name(vec_vid_rowkey_name))) { + LOG_WARN("fail to get vid rowkey name", K(ret), KPC(vid_rowkey_schema)); + } + if (OB_FAIL(ret) || OB_ISNULL(rowkey_vid_schema)) { + } else if (OB_FAIL(rowkey_vid_schema->get_index_name(vec_rowkey_vid_name))) { + LOG_WARN("fail to get rowkey vid name", K(ret), KPC(rowkey_vid_schema)); + } + if (OB_FAIL(ret) || OB_ISNULL(index_id_schema)) { + } else if (OB_FAIL(index_id_schema->get_index_name(vec_index_id_name))) { + LOG_WARN("fail to get index id name", K(ret), KPC(index_id_schema)); + } + if (OB_FAIL(ret) || OB_ISNULL(snapshot_data_schema)) { + } else if (OB_FAIL(snapshot_data_schema->get_index_name(vec_snapshot_data_name))) { + LOG_WARN("fail to get snapshot data name", K(ret), KPC(snapshot_data_schema)); + } + + const int64_t init_task_id = 0; + const uint64_t data_table_id = index_schema->get_data_table_id(); + + uint64_t vid_rowkey_table_id = OB_ISNULL(vid_rowkey_schema) ? OB_INVALID_ID : vid_rowkey_schema->get_table_id(); + uint64_t rowkey_vid_table_id = OB_ISNULL(rowkey_vid_schema) ? OB_INVALID_ID : rowkey_vid_schema->get_table_id(); + uint64_t index_id_table_id = OB_ISNULL(index_id_schema) ? OB_INVALID_ID : index_id_schema->get_table_id(); + uint64_t snapshot_data_table_id = OB_ISNULL(snapshot_data_schema) ? OB_INVALID_ID : snapshot_data_schema->get_table_id(); + + const ObVecIndexDDLChildTaskInfo domain_index(vec_domain_index_name, index_schema->get_table_id(), init_task_id); + const ObVecIndexDDLChildTaskInfo vid_rowkey(vec_vid_rowkey_name, vid_rowkey_table_id, init_task_id); + const ObVecIndexDDLChildTaskInfo rowkey_vid(vec_rowkey_vid_name, rowkey_vid_table_id, init_task_id); + const ObVecIndexDDLChildTaskInfo index_id(vec_index_id_name, index_id_table_id, init_task_id); + const ObVecIndexDDLChildTaskInfo snapshot_data(vec_snapshot_data_name, snapshot_data_table_id, init_task_id); + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(index_task.init(index_schema->get_tenant_id(), + task_id, + data_table_id, + DDL_DROP_VEC_INDEX, + rowkey_vid, + vid_rowkey, + domain_index, + index_id, + snapshot_data, + schema_version, + consumer_group_id, + tenant_data_version, + *drop_index_arg))) { + LOG_WARN("init drop index task failed", K(ret), K(data_table_id), K(domain_index)); + } else if (OB_FAIL(index_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { + LOG_WARN("set trace id failed", K(ret)); + } else if (OB_FAIL(insert_task_record(proxy, index_task, allocator, task_record))) { + LOG_WARN("fail to insert task record", K(ret)); + } + } + LOG_INFO("ddl_scheduler create drop vec index task finished", K(ret), K(index_task)); + return ret; +} + int ObDDLScheduler::create_constraint_task( common::ObISQLClient &proxy, const share::schema::ObTableSchema *table_schema, @@ -2246,10 +2439,19 @@ int ObDDLScheduler::schedule_ddl_task(const ObDDLTaskRecord &record) case ObDDLType::DDL_CREATE_FTS_INDEX: ret = schedule_build_fts_index_task(record); break; + case ObDDLType::DDL_DROP_VEC_INDEX: + ret = schedule_drop_vec_index_task(record); + break; + case ObDDLType::DDL_CREATE_VEC_INDEX: + ret = schedule_build_vec_index_task(record); + break; case ObDDLType::DDL_DROP_FTS_INDEX: case ObDDLType::DDL_DROP_MULVALUE_INDEX: ret = schedule_drop_fts_index_task(record); break; + case ObDDLType::DDL_REBUILD_INDEX: + ret = schedule_rebuild_index_task(record); + break; case DDL_DROP_PRIMARY_KEY: ret = schedule_drop_primary_key_task(record); break; @@ -2337,6 +2539,33 @@ int ObDDLScheduler::schedule_build_fts_index_task( return ret; } +int ObDDLScheduler::schedule_build_vec_index_task( + const ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + ObVecIndexBuildTask *build_index_task = nullptr; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(alloc_ddl_task(build_index_task))) { + LOG_WARN("alloc ddl task failed", K(ret)); + } else if (OB_FAIL(build_index_task->init(task_record))) { + LOG_WARN("init global_index_task failed", K(ret), K(task_record)); + } else if (OB_FAIL(build_index_task->set_trace_id(task_record.trace_id_))) { + LOG_WARN("init build index task failed", K(ret)); + } else if (OB_FAIL(inner_schedule_ddl_task(build_index_task, task_record))) { + if (OB_ENTRY_EXIST != ret) { + LOG_WARN("inner schedule task failed", K(ret), K(*build_index_task)); + } + } + if (OB_FAIL(ret) && nullptr != build_index_task) { + build_index_task->~ObVecIndexBuildTask(); + allocator_.free(build_index_task); + build_index_task = nullptr; + } + return ret; +} + int ObDDLScheduler::schedule_build_index_task( const ObDDLTaskRecord &task_record) { @@ -2369,6 +2598,64 @@ int ObDDLScheduler::schedule_build_index_task( return ret; } +int ObDDLScheduler::create_rebuild_index_task( + common::ObISQLClient &proxy, + const share::ObDDLType &ddl_type, + const ObTableSchema *index_schema, + const int64_t parallelism, + const int64_t parent_task_id, + const int64_t consumer_group_id, + const int32_t sub_task_trace_id, + const obrpc::ObRebuildIndexArg *rebuild_index_arg, + const uint64_t tenant_data_version, + ObIAllocator &allocator, + ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + int64_t task_id = 0; + SMART_VAR(ObRebuildIndexTask, index_task){ + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", KR(ret)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), KP(index_schema)); + } else if (!index_schema->is_vec_index()) { // current only support vector index ddl rebuild task + ret = OB_NOT_SUPPORTED; + LOG_WARN("rebuild domain index is not supported", KR(ret), KPC(index_schema)); + } else if (index_schema->is_built_in_vec_index()) { // 期望是可见性表发起的重建(3号表) + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index schema", KR(ret), KPC(index_schema)); + } else if (OB_FAIL(ObDDLTask::fetch_new_task_id(root_service_->get_sql_proxy(), index_schema->get_tenant_id(), task_id))) { + LOG_WARN("fetch new task id failed", KR(ret)); + } else { + const uint64_t data_table_id = index_schema->get_data_table_id(); + const uint64_t index_table_id = index_schema->get_table_id(); + if (OB_FAIL(index_task.init(index_schema->get_tenant_id(), + task_id, + ddl_type, + data_table_id, + index_table_id, + index_schema->get_schema_version(), + parent_task_id, + consumer_group_id, + sub_task_trace_id, + parallelism, + tenant_data_version, + *rebuild_index_arg))) { + LOG_WARN("init drop index task failed", KR(ret), K(data_table_id), K(index_table_id)); + } else if (OB_FAIL(index_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { + LOG_WARN("set trace id failed", KR(ret)); + } else if (OB_FAIL(insert_task_record(proxy, index_task, allocator, task_record))) { + LOG_WARN("fail to insert task record", KR(ret)); + } + } + } // end smart var + LOG_INFO("ddl_scheduler create rebuild index task finished", KR(ret), "ddl_event_info", ObDDLEventInfo(), K(task_record)); + return ret; +} + + int ObDDLScheduler::schedule_drop_primary_key_task(const ObDDLTaskRecord &task_record) { int ret = OB_SUCCESS; @@ -2573,6 +2860,36 @@ int ObDDLScheduler::schedule_drop_index_task(const ObDDLTaskRecord &task_record) return ret; } +int ObDDLScheduler::schedule_drop_vec_index_task(const ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + ObDropVecIndexTask *drop_vec_index_task = nullptr; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDDLScheduler has not been inited", K(ret)); + } else if (OB_FAIL(alloc_ddl_task(drop_vec_index_task))) { + LOG_WARN("fail to alloc drop fts index task", K(ret)); + } else if (OB_ISNULL(drop_vec_index_task)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null pointer of drop_vec_index_task", K(ret)); + } else if (OB_FAIL(drop_vec_index_task->init(task_record))) { + LOG_WARN("fail to init drop fts index task", K(ret)); + } else if (OB_FAIL(drop_vec_index_task->set_trace_id(task_record.trace_id_))) { + LOG_WARN("fail to set trace id", K(ret)); + } else if (OB_FAIL(inner_schedule_ddl_task(drop_vec_index_task, task_record))) { + if (OB_ENTRY_EXIST != ret) { + LOG_WARN("fail to inner schedule task", K(ret)); + } + } + if (OB_FAIL(ret) && nullptr != drop_vec_index_task) { + drop_vec_index_task->~ObDropVecIndexTask(); + allocator_.free(drop_vec_index_task); + drop_vec_index_task = nullptr; + } + + return ret; +} + int ObDDLScheduler::schedule_drop_fts_index_task(const ObDDLTaskRecord &task_record) { int ret = OB_SUCCESS; @@ -2696,6 +3013,35 @@ int ObDDLScheduler::schedule_build_mview_task(const ObDDLTaskRecord &task_record return ret; } +int ObDDLScheduler::schedule_rebuild_index_task(const ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + ObRebuildIndexTask *rebuild_index_task = nullptr; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDDLScheduler has not been inited", K(ret)); + } else if (OB_FAIL(alloc_ddl_task(rebuild_index_task))) { + LOG_WARN("alloc ddl task failed", KR(ret)); + } else if (OB_FAIL(rebuild_index_task->init(task_record))) { + LOG_WARN("init drop index task failed", KR(ret)); + } else if (OB_FAIL(rebuild_index_task->set_trace_id(task_record.trace_id_))) { + LOG_WARN("set trace id failed", KR(ret)); + } else if (OB_FAIL(inner_schedule_ddl_task(rebuild_index_task, task_record))) { + if (OB_ENTRY_EXIST != ret) { + LOG_WARN("inner schedule task failed", KR(ret)); + } + } + if (nullptr != rebuild_index_task) { + LOG_INFO("ddl_scheduler schedule rebuild index task", KR(ret), "ddl_event_info", ObDDLEventInfo(), K(task_record)); + } + if (OB_FAIL(ret) && nullptr != rebuild_index_task) { + rebuild_index_task->~ObRebuildIndexTask(); + allocator_.free(rebuild_index_task); + rebuild_index_task = nullptr; + } + return ret; +} + int ObDDLScheduler::add_task_to_longops_mgr(ObDDLTask *ddl_task) { int ret = OB_SUCCESS; @@ -2918,6 +3264,11 @@ int ObDDLScheduler::on_sstable_complement_job_reply( LOG_WARN("update complete sstable job status", K(ret), K(tablet_id), K(snapshot_version), K(ret_code)); } break; + case ObDDLType::DDL_DROP_VEC_INDEX: + if (OB_FAIL(static_cast(&task)->update_drop_lob_meta_row_job_status(tablet_id, snapshot_version, execution_id, ret_code, addition_info))) { + LOG_WARN("update complete sstable job status", K(ret), K(tablet_id), K(snapshot_version), K(ret_code)); + } + break; default: ret = OB_NOT_SUPPORTED; LOG_WARN("not supported ddl task", K(ret), K(task)); diff --git a/src/rootserver/ddl_task/ob_ddl_scheduler.h b/src/rootserver/ddl_task/ob_ddl_scheduler.h index 8442639e19..989cac5df2 100755 --- a/src/rootserver/ddl_task/ob_ddl_scheduler.h +++ b/src/rootserver/ddl_task/ob_ddl_scheduler.h @@ -377,6 +377,17 @@ private: const obrpc::ObCreateIndexArg *create_index_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record); + int create_build_vec_index_task( + common::ObISQLClient &proxy, + const share::schema::ObTableSchema *data_table_schema, + const share::schema::ObTableSchema *index_schema, + const int64_t parallelism, + const int64_t parent_task_id, + const int64_t consumer_group_id, + const obrpc::ObCreateIndexArg *create_index_arg, + const uint64_t tenant_data_version, + ObIAllocator &allocator, + ObDDLTaskRecord &task_record); int create_constraint_task( common::ObISQLClient &proxy, const share::schema::ObTableSchema *table_schema, @@ -454,6 +465,19 @@ private: ObIAllocator &allocator, ObDDLTaskRecord &task_record); + int create_rebuild_index_task( + common::ObISQLClient &proxy, + const share::ObDDLType &ddl_type, + const ObTableSchema *index_schema, + const int64_t parallelism, + const int64_t parent_task_id, + const int64_t consumer_group_id, + const int32_t sub_task_trace_id, + const obrpc::ObRebuildIndexArg *rebuild_index_arg, + const uint64_t tenant_data_version, + ObIAllocator &allocator, + ObDDLTaskRecord &task_record); + int create_drop_index_task( common::ObISQLClient &proxy, const share::ObDDLType &ddl_type, @@ -476,6 +500,20 @@ private: ObIAllocator &allocator, ObDDLTaskRecord &task_record); + int create_drop_vec_index_task( + common::ObISQLClient &proxy, + const share::schema::ObTableSchema *index_schema, + const int64_t schema_version, + const int64_t consumer_group_id, + const share::schema::ObTableSchema *vid_rowkey_schema_, + const share::schema::ObTableSchema *rowkey_vid_schema_, + const share::schema::ObTableSchema *delta_buffer_schema_, + const share::schema::ObTableSchema *index_snapshot_data_schema_, + const uint64_t tenant_data_version, + const obrpc::ObDropIndexArg *drop_index_arg, + ObIAllocator &allocator, + ObDDLTaskRecord &task_record); + int create_ddl_retry_task( common::ObISQLClient &proxy, const uint64_t tenant_id, @@ -503,6 +541,8 @@ private: ObDDLTaskRecord &task_record); int schedule_build_fts_index_task( + const ObDDLTaskRecord &task_record); + int schedule_build_vec_index_task( const ObDDLTaskRecord &task_record); int schedule_build_index_task( const ObDDLTaskRecord &task_record); @@ -513,6 +553,8 @@ private: int schedule_column_redefinition_task(const ObDDLTaskRecord &task_record); int schedule_modify_autoinc_task(const ObDDLTaskRecord &task_record); int schedule_drop_index_task(const ObDDLTaskRecord &task_record); + int schedule_drop_vec_index_task(const ObDDLTaskRecord &task_record); + int schedule_rebuild_index_task(const ObDDLTaskRecord &task_record); int schedule_drop_fts_index_task(const ObDDLTaskRecord &task_record); int schedule_ddl_retry_task(const ObDDLTaskRecord &task_record); int schedule_recover_restore_table_task(const ObDDLTaskRecord &task_record); diff --git a/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp b/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp index f9444aa2b8..1a93e8c856 100644 --- a/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp +++ b/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp @@ -246,7 +246,7 @@ int ObDDLSingleReplicaExecutor::schedule_task() return ret; } -int ObDDLSingleReplicaExecutor::check_build_end(bool &is_end, int64_t &ret_code) +int ObDDLSingleReplicaExecutor::check_build_end(const bool need_checksum, bool &is_end, int64_t &ret_code) { int ret = OB_SUCCESS; is_end = false; @@ -269,12 +269,14 @@ int ObDDLSingleReplicaExecutor::check_build_end(bool &is_end, int64_t &ret_code) succ_cnt += ObPartitionBuildStat::BUILD_SUCCEED == build_infos.at(i).stat_; need_schedule |= build_infos.at(i).need_schedule(); } - if (OB_SUCC(ret) && build_infos.count() == succ_cnt) { + if (OB_SUCC(ret) && build_infos.count() == succ_cnt && need_checksum) { if (OB_FAIL(ObCheckTabletDataComplementOp::check_finish_report_checksum( dest_tenant_id_, dest_table_id_, execution_id_, task_id_))) { LOG_WARN("fail to check sstable checksum_report_finish", K(ret), K(dest_tenant_id_), K(dest_table_id_), K(execution_id_), K(task_id_)); } + } + if (OB_SUCC(ret) && build_infos.count() == succ_cnt) { is_end = true; ret_code = ret; } diff --git a/src/rootserver/ddl_task/ob_ddl_single_replica_executor.h b/src/rootserver/ddl_task/ob_ddl_single_replica_executor.h index 0f87590781..c1ba70bd72 100644 --- a/src/rootserver/ddl_task/ob_ddl_single_replica_executor.h +++ b/src/rootserver/ddl_task/ob_ddl_single_replica_executor.h @@ -76,7 +76,7 @@ class ObDDLSingleReplicaExecutor { public: int build(const ObDDLSingleReplicaExecutorParam ¶m); - int check_build_end(bool &is_end, int64_t &ret_code); + int check_build_end(const bool need_checksum, bool &is_end, int64_t &ret_code); int set_partition_task_status(const common::ObTabletID &tablet_id, const int ret_code, const int64_t row_scanned, diff --git a/src/rootserver/ddl_task/ob_ddl_task.cpp b/src/rootserver/ddl_task/ob_ddl_task.cpp index d35853ef69..dea27ced4f 100644 --- a/src/rootserver/ddl_task/ob_ddl_task.cpp +++ b/src/rootserver/ddl_task/ob_ddl_task.cpp @@ -42,6 +42,7 @@ #include "storage/tx/ob_ts_mgr.h" #include "observer/ob_server_struct.h" #include "share/ob_ddl_sim_point.h" +#include "rootserver/ddl_task/ob_rebuild_index_task.h" const bool OB_DDL_TASK_ENABLE_TRACING = false; @@ -178,6 +179,7 @@ ObCreateDDLTaskParam::ObCreateDDLTaskParam() consumer_group_id_(0), parent_task_id_(0), task_id_(0), type_(DDL_INVALID), src_table_schema_(nullptr), dest_table_schema_(nullptr), ddl_arg_(nullptr), allocator_(nullptr), aux_rowkey_doc_schema_(nullptr), aux_doc_rowkey_schema_(nullptr), aux_doc_word_schema_(nullptr), + vec_rowkey_vid_schema_(nullptr), vec_vid_rowkey_schema_(nullptr), vec_index_id_schema_(nullptr), vec_snapshot_data_schema_(nullptr), tenant_data_version_(0), ddl_need_retry_at_executor_(false), is_pre_split_(false) { } @@ -198,7 +200,9 @@ ObCreateDDLTaskParam::ObCreateDDLTaskParam(const uint64_t tenant_id, : sub_task_trace_id_(0), tenant_id_(tenant_id), object_id_(object_id), schema_version_(schema_version), parallelism_(parallelism), consumer_group_id_(consumer_group_id), parent_task_id_(parent_task_id), task_id_(task_id), type_(type), src_table_schema_(src_table_schema), dest_table_schema_(dest_table_schema), ddl_arg_(ddl_arg), allocator_(allocator), aux_rowkey_doc_schema_(nullptr), aux_doc_rowkey_schema_(nullptr), - aux_doc_word_schema_(nullptr), ddl_need_retry_at_executor_(ddl_need_retry_at_executor), is_pre_split_(false) + aux_doc_word_schema_(nullptr), + vec_rowkey_vid_schema_(nullptr), vec_vid_rowkey_schema_(nullptr), vec_index_id_schema_(nullptr), vec_snapshot_data_schema_(nullptr), + ddl_need_retry_at_executor_(ddl_need_retry_at_executor), is_pre_split_(false) { } @@ -698,6 +702,26 @@ int ObFTSDDLChildTaskInfo::deep_copy_from_other( OB_SERIALIZE_MEMBER(ObFTSDDLChildTaskInfo, index_name_, table_id_); + +int ObVecIndexDDLChildTaskInfo::deep_copy_from_other( + const ObVecIndexDDLChildTaskInfo &other, + common::ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (this != &other) { + if (OB_FAIL(ob_write_string(allocator, other.index_name_, index_name_))) { + LOG_WARN("fail to copy table name", K(ret), K(other)); + } else { + table_id_ = other.table_id_; + task_id_ = other.task_id_; + } + } + return ret; +} + +OB_SERIALIZE_MEMBER(ObVecIndexDDLChildTaskInfo, index_name_, table_id_); + + int ObDDLTask::cleanup() { int ret = cleanup_impl(); @@ -738,6 +762,13 @@ int ObDDLTask::get_ddl_type_str(const int64_t ddl_type, const char *&ddl_type_st break; case DDL_CREATE_FTS_INDEX: ddl_type_str = "create fts index"; + break; + case DDL_CREATE_VEC_INDEX: + ddl_type_str = "create vec index"; + break; + case DDL_REBUILD_INDEX: + ddl_type_str = "rebuild vec index"; + break; case DDL_CREATE_PARTITIONED_LOCAL_INDEX: ddl_type_str = "create partitioned local index"; break; @@ -1009,6 +1040,16 @@ int64_t ObDDLTask::get_serialize_param_size() const return serialize_field.get_serialize_size(); } +bool ObDDLTask::is_ddl_task_can_be_cancelled() const +{ + bool can_be_cancelled = true; + if (task_type_ == ObDDLType::DDL_DROP_INDEX || + task_type_ == ObDDLType::DDL_DROP_VEC_INDEX) { + can_be_cancelled = false; + } + return can_be_cancelled; +} + int ObDDLTask::convert_to_record( ObDDLTaskRecord &task_record, common::ObIAllocator &allocator) @@ -1095,7 +1136,7 @@ int ObDDLTask::switch_status(const ObDDLTaskStatus new_status, const bool enable } if (is_cancel) { real_ret_code = (OB_SUCCESS == ret_code || error_need_retry) ? OB_CANCELED : ret_code; - FLOG_INFO("ddl task is canceled", K(task_id_), K(parent_task_id_), K(object_id_), + FLOG_INFO("ddl task is cancelled", K(task_type_), K(task_id_), K(parent_task_id_), K(object_id_), K(target_object_id_), K(ret_code), K(error_need_retry), K(real_ret_code)); } else if (SUCCESS == old_status || error_need_retry) { LOG_INFO("error code found, but execute again", K(task_id_), K(parent_task_id_), @@ -1319,6 +1360,12 @@ int ObDDLTask::check_ddl_task_is_cancel(const TraceId &trace_id, bool &is_cancel } else if (OB_FAIL(SYS_TASK_STATUS_MGR.is_task_cancel(trace_id, is_cancel))) { LOG_WARN("failed to check task is cancel", K(ret), K(trace_id)); } + if (OB_SUCC(ret) && is_cancel) { + if (!is_ddl_task_can_be_cancelled()) { + is_cancel = false; + LOG_INFO("ddl task can not be cancelled", K(task_type_), K(task_id_)); + } + } return ret; } @@ -3133,37 +3180,116 @@ int ObDDLTaskRecordOperator::check_has_conflict_ddl( return ret; } -int ObDDLTaskRecordOperator::check_has_index_or_mlog_task( - common::ObISQLClient &proxy, +int ObDDLTaskRecordOperator::check_rebuild_vec_index_task_exist( const uint64_t tenant_id, const uint64_t data_table_id, const uint64_t index_table_id, + common::ObISQLClient &proxy, + common::ObIAllocator &allocator, + bool &is_exist) +{ + int ret = OB_SUCCESS; + ObSqlString sql_string; + is_exist = false; + if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || + OB_INVALID_ID == data_table_id || + OB_INVALID_ID == index_table_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(index_table_id), K(tenant_id), K(data_table_id)); + } else if (OB_FAIL(sql_string.assign_fmt(" SELECT object_id, target_object_id, UNHEX(message) as message_unhex FROM %s WHERE ddl_type = %d", + OB_ALL_DDL_TASK_STATUS_TNAME, ObDDLType::DDL_REBUILD_INDEX))) { + LOG_WARN("assign sql string failed", K(ret)); + } else { + LOG_DEBUG("check_rebuild_vec_index_task_exist target id", K(data_table_id), K(index_table_id)); + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + ObDDLTaskRecord task_record; + ObString task_message; + sqlclient::ObMySQLResult *result = NULL; + if (OB_INVALID_TENANT_ID == tenant_id) { + if (OB_FAIL(proxy.read(res, sql_string.ptr()))) { + LOG_WARN("query ddl task record failed", K(ret), K(sql_string)); + } + } else { + if (OB_FAIL(proxy.read(res, tenant_id, sql_string.ptr()))) { + LOG_WARN("query ddl task record failed", K(ret), K(sql_string)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(result = res.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get sql result", K(ret), KP(result)); + } else { + while (OB_SUCC(ret) && !is_exist && OB_SUCC(result->next())) { + EXTRACT_INT_FIELD_MYSQL(*result, "object_id", task_record.object_id_, uint64_t); + EXTRACT_INT_FIELD_MYSQL(*result, "target_object_id", task_record.target_object_id_, uint64_t); + EXTRACT_VARCHAR_FIELD_MYSQL(*result, "message_unhex", task_message); + LOG_DEBUG("task record", K(task_record.object_id_), K(task_record.target_object_id_)); + if (OB_SUCC(ret) && !task_message.empty()) { + uint64_t new_index_id = OB_INVALID_ID; + int64_t pos = 0; + SMART_VAR(rootserver::ObRebuildIndexTask, task) { + if (OB_FAIL(task.deserialize_params_from_message(tenant_id, task_message.ptr(), task_message.length(), pos))) { + LOG_WARN("deserialize from msg failed", K(ret)); + } else { + new_index_id = task.get_new_index_id(); + LOG_DEBUG("new index id", K(new_index_id)); + } + } + if (OB_FAIL(ret)) { + } else if ((data_table_id == task_record.object_id_ && index_table_id == new_index_id) || // new or old index match + (data_table_id == task_record.object_id_ && index_table_id == task_record.target_object_id_)) { + is_exist = true; + } + } + } + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + } + } + } + return ret; +} + +int ObDDLTaskRecordOperator::check_has_index_or_mlog_task( + common::ObISQLClient &proxy, + const ObTableSchema &index_schema, + const uint64_t tenant_id, + const uint64_t data_table_id, bool &has_index_task) { int ret = OB_SUCCESS; has_index_task = false; + const uint64_t index_table_id = index_schema.get_table_id(); if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || OB_INVALID_ID == data_table_id || OB_INVALID_ID == index_table_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(tenant_id), K(data_table_id)); } else { - ObSqlString sql_string; - SMART_VAR(ObMySQLProxy::MySQLResult, res) { - sqlclient::ObMySQLResult *result = NULL; - if (OB_FAIL(sql_string.assign_fmt("SELECT EXISTS(SELECT 1 FROM %s WHERE object_id = %lu AND target_object_id = %lu AND ddl_type IN (%d, %d, %d, %d, %d)) as has", - OB_ALL_DDL_TASK_STATUS_TNAME, data_table_id, index_table_id, ObDDLType::DDL_CREATE_INDEX, ObDDLType::DDL_CREATE_PARTITIONED_LOCAL_INDEX, ObDDLType::DDL_DROP_INDEX, - ObDDLType::DDL_CREATE_MLOG, ObDDLType::DDL_DROP_MLOG))) { - LOG_WARN("assign sql string failed", K(ret)); - } else if (OB_FAIL(proxy.read(res, tenant_id, sql_string.ptr()))) { - LOG_WARN("query ddl task record failed", K(ret), K(sql_string)); - } else if (OB_ISNULL(result = res.get_result())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("fail to get sql result", K(ret), KP(result)); - } else if (OB_FAIL(result->next())) { - LOG_WARN("result next failed", K(ret), K(tenant_id), K(index_table_id)); - } else { - EXTRACT_BOOL_FIELD_MYSQL(*result, "has", has_index_task); + if (index_schema.is_vec_index()) { + ObArenaAllocator allocator(ObModIds::OB_SCHEMA); + if (OB_FAIL(check_rebuild_vec_index_task_exist(tenant_id, data_table_id, index_table_id, proxy, allocator, has_index_task))) { + LOG_WARN("fail to check rebuild vec index task", K(ret), K(data_table_id), K(index_table_id)); + } + } else { + ObSqlString sql_string; + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + sqlclient::ObMySQLResult *result = NULL; + if (OB_FAIL(sql_string.assign_fmt("SELECT EXISTS(SELECT 1 FROM %s WHERE object_id = %lu AND target_object_id = %lu AND ddl_type IN (%d, %d, %d, %d, %d)) as has", + OB_ALL_DDL_TASK_STATUS_TNAME, data_table_id, index_table_id, ObDDLType::DDL_CREATE_INDEX, ObDDLType::DDL_CREATE_PARTITIONED_LOCAL_INDEX, ObDDLType::DDL_DROP_INDEX, + ObDDLType::DDL_CREATE_MLOG, ObDDLType::DDL_DROP_MLOG))) { + LOG_WARN("assign sql string failed", K(ret)); + } else if (OB_FAIL(proxy.read(res, tenant_id, sql_string.ptr()))) { + LOG_WARN("query ddl task record failed", K(ret), K(sql_string)); + } else if (OB_ISNULL(result = res.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get sql result", K(ret), KP(result)); + } else if (OB_FAIL(result->next())) { + LOG_WARN("result next failed", K(ret), K(tenant_id), K(index_table_id)); + } else { + EXTRACT_BOOL_FIELD_MYSQL(*result, "has", has_index_task); + } } } } diff --git a/src/rootserver/ddl_task/ob_ddl_task.h b/src/rootserver/ddl_task/ob_ddl_task.h index cc39af92e1..060c029887 100755 --- a/src/rootserver/ddl_task/ob_ddl_task.h +++ b/src/rootserver/ddl_task/ob_ddl_task.h @@ -138,6 +138,32 @@ public: int64_t task_id_; }; + +struct ObVecIndexDDLChildTaskInfo final +{ +public: + ObVecIndexDDLChildTaskInfo() : index_name_(), table_id_(OB_INVALID_ID), task_id_(0) {} + ObVecIndexDDLChildTaskInfo( + common::ObString &index_name, + const uint64_t table_id, + const int64_t task_id) + : index_name_(index_name), + table_id_(table_id), + task_id_(task_id) + {} + ~ObVecIndexDDLChildTaskInfo() = default; + bool is_valid() const { return OB_INVALID_ID != table_id_ && !index_name_.empty(); } + int deep_copy_from_other(const ObVecIndexDDLChildTaskInfo &other, common::ObIAllocator &allocator); + TO_STRING_KV(K_(table_id), K_(task_id), K_(index_name)); + OB_UNIS_VERSION(1); +public: + common::ObString index_name_; + uint64_t table_id_; + // The following fields are not persisted to the `__all_ddl_task_status` system table. + int64_t task_id_; +}; + + struct ObDDLTaskSerializeField final { OB_UNIS_VERSION(1); @@ -193,6 +219,7 @@ public: TO_STRING_KV(K_(tenant_id), K_(object_id), K_(schema_version), K_(parallelism), K_(consumer_group_id), K_(parent_task_id), K_(task_id), K_(type), KPC_(src_table_schema), KPC_(dest_table_schema), KPC_(ddl_arg), K_(tenant_data_version), K_(sub_task_trace_id), KPC_(aux_rowkey_doc_schema), KPC_(aux_doc_rowkey_schema), KPC_(aux_doc_word_schema), + K_(vec_rowkey_vid_schema), K_(vec_vid_rowkey_schema), K_(vec_index_id_schema), K_(vec_snapshot_data_schema), K_(ddl_need_retry_at_executor), K_(is_pre_split)); public: int32_t sub_task_trace_id_; @@ -211,6 +238,10 @@ public: const ObTableSchema *aux_rowkey_doc_schema_; const ObTableSchema *aux_doc_rowkey_schema_; const ObTableSchema *aux_doc_word_schema_; + const ObTableSchema *vec_rowkey_vid_schema_; + const ObTableSchema *vec_vid_rowkey_schema_; + const ObTableSchema *vec_index_id_schema_; + const ObTableSchema *vec_snapshot_data_schema_; uint64_t tenant_data_version_; bool ddl_need_retry_at_executor_; bool is_pre_split_; @@ -318,9 +349,9 @@ public: static int check_has_index_or_mlog_task( common::ObISQLClient &proxy, + const ObTableSchema &index_schema, const uint64_t tenant_id, const uint64_t data_table_id, - const uint64_t index_table_id, bool &has_index_task); static int get_create_index_or_mlog_task_cnt( @@ -354,6 +385,14 @@ public: common::ObIAllocator &allocator, common::ObIArray &records); + static int check_rebuild_vec_index_task_exist( + const uint64_t tenant_id, + const uint64_t data_table_id, + const uint64_t index_table_id, + common::ObISQLClient &proxy, + common::ObIAllocator &allocator, + bool &is_exist); + private: static int fill_task_record( const uint64_t tenant_id, @@ -564,16 +603,22 @@ public: void set_sub_task_trace_id(const int32_t sub_task_trace_id) { sub_task_trace_id_ = sub_task_trace_id; } void add_event_info(const ObString &ddl_event_stmt); void add_event_info(const share::ObDDLTaskStatus status, const uint64_t tenant_id); + bool is_inited() const { return is_inited_; } bool try_set_running() { return !ATOMIC_CAS(&is_running_, false, true); } uint64_t get_tenant_id() const { return dst_tenant_id_; } + int64_t get_src_tenant_id() const { return tenant_id_; } uint64_t get_object_id() const { return object_id_; } int64_t get_schema_version() const { return dst_schema_version_; } + int64_t get_src_schema_version() const { return schema_version_; } uint64_t get_target_object_id() const { return target_object_id_; } int64_t get_task_status() const { return task_status_; } int64_t get_snapshot_version() const { return snapshot_version_; } int get_ddl_type_str(const int64_t ddl_type, const char *&ddl_type_str); int64_t get_ret_code() const { return ret_code_; } int64_t get_task_id() const { return task_id_; } + int64_t get_delay_schedule_time() const { return delay_schedule_time_;} + void set_delay_schedule_time(int64_t delay_schedule_time) { delay_schedule_time_ = delay_schedule_time;} + ObDDLWaitTransEndCtx* get_wait_trans_ctx() {return &wait_trans_ctx_;} ObDDLTaskID get_ddl_task_id() const { return ObDDLTaskID(dst_tenant_id_, task_id_); } ObDDLTaskKey get_task_key() const { return ObDDLTaskKey(dst_tenant_id_, target_object_id_, dst_schema_version_); } int64_t get_parent_task_id() const { return parent_task_id_; } @@ -591,6 +636,7 @@ public: virtual int serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const; virtual int deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos); virtual int64_t get_serialize_param_size() const; + virtual bool is_ddl_task_can_be_cancelled() const; const ObString &get_ddl_stmt_str() const { return ddl_stmt_str_; } int set_ddl_stmt_str(const ObString &ddl_stmt_str); int convert_to_record(ObDDLTaskRecord &task_record, common::ObIAllocator &allocator); diff --git a/src/rootserver/ddl_task/ob_drop_vec_index_task.cpp b/src/rootserver/ddl_task/ob_drop_vec_index_task.cpp new file mode 100644 index 0000000000..423c43c994 --- /dev/null +++ b/src/rootserver/ddl_task/ob_drop_vec_index_task.cpp @@ -0,0 +1,899 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX COMMON + +#include "rootserver/ddl_task/ob_drop_vec_index_task.h" +#include "share/schema/ob_multi_version_schema_service.h" +#include "share/ob_ddl_error_message_table_operator.h" +#include "sql/engine/cmd/ob_ddl_executor_util.h" +#include "rootserver/ob_root_service.h" +#include "share/ob_ddl_sim_point.h" + +using namespace oceanbase::share; + +namespace oceanbase +{ +namespace rootserver +{ + +ObDropVecIndexTask::ObDropVecIndexTask() + : ObDDLTask(DDL_DROP_VEC_INDEX), + root_service_(nullptr), + rowkey_vid_(), + vid_rowkey_(), + domain_index_(), // delta_buffer_table + vec_index_id_(), + vec_index_snapshot_data_(), + drop_index_arg_(), + replica_builder_(), + check_dag_exit_tablets_map_(), + wait_trans_ctx_(), + delte_lob_meta_request_time_(0), + delte_lob_meta_job_ret_code_(INT64_MAX), + check_dag_exit_retry_cnt_(0), + del_lob_meta_row_task_submitted_(false), + snapshot_held_(false) +{ +} + +ObDropVecIndexTask::~ObDropVecIndexTask() +{ +} + +int ObDropVecIndexTask::init( + const uint64_t tenant_id, + const int64_t task_id, + const uint64_t data_table_id, + const ObDDLType ddl_type, + const ObVecIndexDDLChildTaskInfo &rowkey_vid, + const ObVecIndexDDLChildTaskInfo &vid_rowkey, + const ObVecIndexDDLChildTaskInfo &domain_index, // delta_buffer_table + const ObVecIndexDDLChildTaskInfo &vec_delta_buffer, + const ObVecIndexDDLChildTaskInfo &vec_index_snapshot_data, + const int64_t schema_version, + const int64_t consumer_group_id, + const uint64_t tenant_data_version, + const obrpc::ObDropIndexArg &drop_index_arg) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(OB_INVALID_ID == tenant_id + || task_id <= 0 + || OB_INVALID_ID == data_table_id + || !domain_index.is_valid() + || schema_version <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(tenant_id), K(task_id), K(data_table_id), K(rowkey_vid), + K(vid_rowkey), K(domain_index), K(vec_delta_buffer), K(vec_index_snapshot_data), K(schema_version)); + } else if (OB_ISNULL(root_service_ = GCTX.root_service_)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys, root service is null", K(ret)); + } else if (OB_FAIL(deep_copy_index_arg(allocator_, drop_index_arg, drop_index_arg_))) { + LOG_WARN("deep copy drop index arg failed", K(ret)); + } else if (OB_FAIL(rowkey_vid_.deep_copy_from_other(rowkey_vid, allocator_))) { + LOG_WARN("fail to deep copy from other", K(ret), K(rowkey_vid)); + } else if (OB_FAIL(vid_rowkey_.deep_copy_from_other(vid_rowkey, allocator_))) { + LOG_WARN("fail to deep copy from other", K(ret), K(vid_rowkey)); + } else if (OB_FAIL(domain_index_.deep_copy_from_other(domain_index, allocator_))) { + LOG_WARN("fail to deep copy from other", K(ret), K(domain_index)); + } else if (OB_FAIL(vec_index_id_.deep_copy_from_other(vec_delta_buffer, allocator_))) { + LOG_WARN("fail to deep copy from other", K(ret), K(vec_delta_buffer)); + } else if (OB_FAIL(vec_index_snapshot_data_.deep_copy_from_other(vec_index_snapshot_data, allocator_))) { + LOG_WARN("fail to deep copy from other", K(ret), K(vec_index_snapshot_data)); + } else { + task_type_ = DDL_DROP_VEC_INDEX; + set_gmt_create(ObTimeUtility::current_time()); + tenant_id_ = tenant_id; + object_id_ = data_table_id; + target_object_id_ = domain_index.table_id_; + schema_version_ = schema_version; + task_id_ = task_id; + parent_task_id_ = 0; // no parent task + consumer_group_id_ = consumer_group_id; + task_version_ = OB_DROP_VEC_INDEX_TASK_VERSION; + dst_tenant_id_ = tenant_id; + dst_schema_version_ = schema_version; + is_inited_ = true; + data_format_version_ = tenant_data_version; + execution_id_ = 1L; + } + return ret; +} + +int ObDropVecIndexTask::init(const ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + if (OB_UNLIKELY(!task_record.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(task_record)); + } else if (OB_ISNULL(root_service_ = GCTX.root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, root service is nullptr", K(ret)); + } else { + task_type_ = task_record.ddl_type_; + tenant_id_ = task_record.tenant_id_; + object_id_ = task_record.object_id_; + target_object_id_ = task_record.target_object_id_; + schema_version_ = task_record.schema_version_; + task_id_ = task_record.task_id_; + parent_task_id_ = task_record.parent_task_id_; + task_version_ = task_record.task_version_; + ret_code_ = task_record.ret_code_; + dst_tenant_id_ = tenant_id_; + dst_schema_version_ = schema_version_; + execution_id_ = task_record.execution_id_; + snapshot_version_ = task_record.snapshot_version_; + pos = 0; + if (OB_ISNULL(task_record.message_.ptr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, task record message is nullptr", K(ret), K(task_record)); + } else if (OB_FAIL(deserialize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), + task_record.message_.length(), pos))) { + LOG_WARN("deserialize params from message failed", K(ret)); + } else { + is_inited_ = true; + // set up span during recover task + ddl_tracing_.open_for_recovery(); + } + } + return ret; +} + +// to hold snapshot, containing data in old table with new schema version. +int ObDropVecIndexTask::obtain_snapshot(const share::ObDDLTaskStatus next_task_status) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret)); + } else if ((snapshot_version_ > 0 && snapshot_held_) || !vec_index_snapshot_data_.is_valid()) { + // do nothing, already hold snapshot or do not need snapshot(when snapshot table is not built) + } else if (OB_FAIL(ObDDLUtil::obtain_snapshot(next_task_status, vec_index_snapshot_data_.table_id_, + vec_index_snapshot_data_.table_id_, snapshot_version_, + snapshot_held_, this))) { + LOG_WARN("fail to obtain_snapshot", K(ret), K(snapshot_version_), K(snapshot_held_)); + } + + return ret; +} + +int64_t ObDropVecIndexTask::get_build_replica_request_time() +{ + TCRLockGuard guard(lock_); + return delte_lob_meta_request_time_; +} + +int ObDropVecIndexTask::drop_lob_meta_row(const ObDDLTaskStatus next_task_status) +{ + int ret = OB_SUCCESS; + bool is_build_replica_end = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDropVecIndexTask is not inited", K(ret)); + } else if (ObDDLTaskStatus::DROP_LOB_META_ROW != task_status_) { + ret = OB_TASK_EXPIRED; + LOG_WARN("task status not match", K(ret), K(task_status_)); + } else if (OB_UNLIKELY(snapshot_version_ <= 0)) { + is_build_replica_end = true; // switch to fail. + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected snapshot", K(ret), KPC(this)); + } else if (vec_index_snapshot_data_.is_valid() && !del_lob_meta_row_task_submitted_ && OB_FAIL(send_build_single_replica_request())) { + LOG_WARN("fail to send build single replica request", K(ret)); + } else if (vec_index_snapshot_data_.is_valid() && del_lob_meta_row_task_submitted_ && OB_FAIL(check_build_single_replica(is_build_replica_end))) { + LOG_WARN("fail to check build single replica", K(ret), K(is_build_replica_end)); + } else if (!vec_index_snapshot_data_.is_valid()) { + is_build_replica_end = true; + } + if (is_build_replica_end) { + ret = OB_SUCC(ret) ? delte_lob_meta_job_ret_code_ : ret; + if (OB_FAIL(ret)) { + LOG_WARN("fail in delete lob meta row", K(ret)); + } else if (OB_FAIL(finish())) { + LOG_WARN("fail in release snapshot", K(ret)); + } else if (OB_FAIL(switch_status(next_task_status, true/*enable_flt*/, ret))) { + LOG_WARN("fail to swith task status", K(ret)); + } + } + return ret; +} + +int ObDropVecIndexTask::process() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDropIndexTask has not been inited", K(ret)); + } else if (!need_retry()) { + // task is done + } else if (OB_FAIL(check_switch_succ())) { + LOG_WARN("check need retry failed", K(ret)); + } else { + ddl_tracing_.restore_span_hierarchy(); + const ObDDLTaskStatus status = static_cast(task_status_); + switch (status) { + case ObDDLTaskStatus::PREPARE: + if (OB_FAIL(prepare(ObDDLTaskStatus::WAIT_TRANS_END))) { + LOG_WARN("fail to prepare", K(ret)); + } + break; + case ObDDLTaskStatus::WAIT_TRANS_END: + if (OB_FAIL(wait_trans_end(wait_trans_ctx_, ObDDLTaskStatus::OBTAIN_SNAPSHOT))) { + LOG_WARN("fail to wait trans end", K(ret)); + } + break; + case ObDDLTaskStatus::OBTAIN_SNAPSHOT: + if (OB_FAIL(obtain_snapshot(ObDDLTaskStatus::DROP_LOB_META_ROW))) { + LOG_WARN("fail to wait trans end", K(ret)); + } + break; + case ObDDLTaskStatus::DROP_LOB_META_ROW: + if (OB_FAIL(drop_lob_meta_row(ObDDLTaskStatus::DROP_AUX_INDEX_TABLE))) { + LOG_WARN("fail to do drop lob meta row of aux table", K(ret)); + } + break; + case ObDDLTaskStatus::DROP_AUX_INDEX_TABLE: + if (OB_FAIL(drop_aux_index_table(WAIT_CHILD_TASK_FINISH))) { + LOG_WARN("fail to prepare", K(ret)); + } + break; + case ObDDLTaskStatus::WAIT_CHILD_TASK_FINISH: + if (OB_FAIL(check_and_wait_finish(SUCCESS))) { + LOG_WARN("fail to check and wait task", K(ret)); + } + break; + case ObDDLTaskStatus::SUCCESS: + if (OB_FAIL(succ())) { + LOG_WARN("do succ procedure failed", K(ret)); + } + break; + case ObDDLTaskStatus::FAIL: + if (OB_FAIL(exit_all_dags_and_clean())) { + LOG_WARN("do fail procedure failed", K(ret)); + } + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error unexpected, task status is not valid", K(ret), K(task_status_)); + } + ddl_tracing_.release_span_hierarchy(); + } + return ret; +} + +int ObDropVecIndexTask::deep_copy_index_arg(common::ObIAllocator &allocator, + const obrpc::ObDropIndexArg &src_index_arg, + obrpc::ObDropIndexArg &dst_index_arg) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + char *buf = nullptr; + const int64_t serialize_size = src_index_arg.get_serialize_size(); + if (OB_ISNULL(buf = static_cast(allocator.alloc(serialize_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", K(ret), K(serialize_size)); + } else if (OB_FAIL(src_index_arg.serialize(buf, serialize_size, pos))) { + LOG_WARN("serialize source index arg failed", K(ret)); + } else if (OB_FALSE_IT(pos = 0)) { + } else if (OB_FAIL(dst_index_arg.deserialize(buf, serialize_size, pos))) { + LOG_WARN("deserialize failed", K(ret)); + } + if (OB_FAIL(ret) && nullptr != buf) { + allocator.free(buf); + } + + return ret; +} + +int ObDropVecIndexTask::serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr == buf || buf_size <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(buf), K(buf_size)); + } else if (OB_FAIL(ObDDLTask::serialize_params_to_message(buf, buf_size, pos))) { + LOG_WARN("fail to ObDDLTask::serialize", K(ret)); + } else if (OB_FAIL(rowkey_vid_.serialize(buf, buf_size, pos))) { + LOG_WARN("fail to serialize rowkey vid table info", K(ret), K(rowkey_vid_)); + } else if (OB_FAIL(vid_rowkey_.serialize(buf, buf_size, pos))) { + LOG_WARN("fail to serialize vid rowkey table info", K(ret), K(vid_rowkey_)); + } else if (OB_FAIL(domain_index_.serialize(buf, buf_size, pos))) { // delta_buffer_table + LOG_WARN("fail to serialize index id table info", K(ret), K(domain_index_)); + } else if (OB_FAIL(vec_index_id_.serialize(buf, buf_size, pos))) { + LOG_WARN("fail to serialize index delta buffer table info", K(ret), K(vec_index_id_)); + } else if (OB_FAIL(vec_index_snapshot_data_.serialize(buf, buf_size, pos))) { + LOG_WARN("fail to serialize index snapshot data table info", K(ret), K(vec_index_snapshot_data_)); + } else if (OB_FAIL(drop_index_arg_.serialize(buf, buf_size, pos))) { + LOG_WARN("serialize failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(buf, + buf_size, + pos, + delte_lob_meta_job_ret_code_))) { + LOG_WARN("serialize delte_lob_meta_job_ret_code failed", K(ret)); + } + return ret; +} + +int ObDropVecIndexTask::deserialize_params_from_message( + const uint64_t tenant_id, + const char *buf, + const int64_t buf_size, + int64_t &pos) +{ + int ret = OB_SUCCESS; + obrpc::ObDropIndexArg tmp_drop_index_arg; + ObVecIndexDDLChildTaskInfo tmp_info; + if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || nullptr == buf || buf_size <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(tenant_id), KP(buf), K(buf_size)); + } else if (OB_FAIL(ObDDLTask::deserialize_params_from_message(tenant_id, buf, buf_size, pos))) { + LOG_WARN("fail to ObDDLTask::deserialize", K(ret), K(tenant_id)); + } else if (OB_FAIL(tmp_info.deserialize(buf, buf_size, pos))) { + LOG_WARN("fail to deserialize rowkey vid table info", K(ret)); + } else if (OB_FAIL(rowkey_vid_.deep_copy_from_other(tmp_info, allocator_))) { + LOG_WARN("fail to deep copy from tmp info", K(ret), K(tmp_info)); + } else if (OB_FAIL(tmp_info.deserialize(buf, buf_size, pos))) { + LOG_WARN("fail to deserialize vid rowkey table info", K(ret)); + } else if (OB_FAIL(vid_rowkey_.deep_copy_from_other(tmp_info, allocator_))) { + LOG_WARN("fail to deep copy from tmp info", K(ret), K(tmp_info)); + } else if (OB_FAIL(tmp_info.deserialize(buf, buf_size, pos))) { + LOG_WARN("fail to deserialize index id table info", K(ret)); + } else if (OB_FAIL(domain_index_.deep_copy_from_other(tmp_info, allocator_))) { // delta_buffer_table + LOG_WARN("fail to deep copy from tmp info", K(ret), K(tmp_info)); + } else if (OB_FAIL(tmp_info.deserialize(buf, buf_size, pos))) { + LOG_WARN("fail to deserialize delta buffer table info", K(ret)); + } else if (OB_FAIL(vec_index_id_.deep_copy_from_other(tmp_info, allocator_))) { + LOG_WARN("fail to deep copy from tmp info", K(ret), K(tmp_info)); + } else if (OB_FAIL(tmp_info.deserialize(buf, buf_size, pos))) { + LOG_WARN("fail to deserialize index snapshot data table info", K(ret)); + } else if (OB_FAIL(vec_index_snapshot_data_.deep_copy_from_other(tmp_info, allocator_))) { + LOG_WARN("fail to deep copy from tmp info", K(ret), K(tmp_info)); + } else if (OB_FAIL(tmp_drop_index_arg.deserialize(buf, buf_size, pos))) { + LOG_WARN("deserialize failed", K(ret)); + } else if (OB_FAIL(deep_copy_index_arg(allocator_, tmp_drop_index_arg, drop_index_arg_))) { + LOG_WARN("deep copy drop index arg failed", K(ret)); + } else if (OB_FAIL(serialization::decode_i64(buf, + buf_size, + pos, + &delte_lob_meta_job_ret_code_))) { + LOG_WARN("fail to deserialize delte_lob_meta_job_ret_code_", K(ret)); + } + return ret; +} + +int64_t ObDropVecIndexTask::get_serialize_param_size() const +{ + return ObDDLTask::get_serialize_param_size() + + rowkey_vid_.get_serialize_size() + + vid_rowkey_.get_serialize_size() + + domain_index_.get_serialize_size() // delta_buffer_table + + vec_index_id_.get_serialize_size() + + vec_index_snapshot_data_.get_serialize_size() + + drop_index_arg_.get_serialize_size() + + serialization::encoded_length_i64(delte_lob_meta_job_ret_code_); +} + +int ObDropVecIndexTask::update_task_message() +{ + int ret = OB_SUCCESS; + char *buf = nullptr; + int64_t pos = 0; + ObString msg; + common::ObArenaAllocator allocator("ObVecReBuild"); + const int64_t serialize_param_size = get_serialize_param_size(); + + if (OB_ISNULL(buf = static_cast(allocator.alloc(serialize_param_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory", KR(ret), K(serialize_param_size)); + } else if (OB_FAIL(serialize_params_to_message(buf, serialize_param_size, pos))) { + LOG_WARN("failed to serialize params to message", KR(ret)); + } else { + msg.assign(buf, serialize_param_size); + if (OB_FAIL(ObDDLTaskRecordOperator::update_message(root_service_->get_sql_proxy(), tenant_id_, task_id_, msg))) { + LOG_WARN("failed to update message", KR(ret)); + } + } + return ret; +} + +int ObDropVecIndexTask::check_switch_succ() +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + bool is_domain_index_exist = false; // delta_buffer_table + bool is_vid_rowkey_exist = false; + bool is_rowkey_vid_exist = false; + bool is_index_id_exist = false; + bool is_snapshot_data_exist = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("hasn't initialized", K(ret)); + } else if (OB_ISNULL(root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, root service is nullptr", K(ret), KP(root_service_)); + } else if (OB_FAIL(refresh_schema_version())) { + LOG_WARN("refresh schema version failed", K(ret)); + } else if (OB_FAIL(root_service_->get_schema_service().get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("fail to get tenant schema", K(ret), K(tenant_id_)); + } else if (domain_index_.is_valid() + && OB_FAIL(schema_guard.check_table_exist(tenant_id_, domain_index_.table_id_, is_domain_index_exist))) { + LOG_WARN("fail to check table exist", K(ret), K(tenant_id_), K(domain_index_)); + } else if (vid_rowkey_.is_valid() + && OB_FAIL(schema_guard.check_table_exist(tenant_id_, vid_rowkey_.table_id_, is_vid_rowkey_exist))) { + LOG_WARN("fail to check table exist", K(ret), K(tenant_id_), K(vid_rowkey_)); + } else if (rowkey_vid_.is_valid() + && OB_FAIL(schema_guard.check_table_exist(tenant_id_, rowkey_vid_.table_id_, is_rowkey_vid_exist))) { + LOG_WARN("fail to check table exist", K(ret), K(tenant_id_), K(rowkey_vid_)); + } else if (vec_index_id_.is_valid() + && OB_FAIL(schema_guard.check_table_exist(tenant_id_, vec_index_id_.table_id_, is_index_id_exist))) { + LOG_WARN("fail to check table exist", K(ret), K(tenant_id_), K(vec_index_id_)); + } else if (vec_index_snapshot_data_.is_valid() + && OB_FAIL(schema_guard.check_table_exist(tenant_id_, vec_index_snapshot_data_.table_id_, is_snapshot_data_exist))) { + LOG_WARN("fail to check table exist", K(ret), K(tenant_id_), K(vec_index_snapshot_data_)); + } else { + is_domain_index_exist = domain_index_.is_valid() ? is_domain_index_exist : false; + is_rowkey_vid_exist = rowkey_vid_.is_valid() ? is_rowkey_vid_exist : false; + is_vid_rowkey_exist = vid_rowkey_.is_valid() ? is_vid_rowkey_exist : false; + is_index_id_exist = vec_index_id_.is_valid() ? is_index_id_exist : false; + is_snapshot_data_exist = vec_index_snapshot_data_.is_valid() ? is_snapshot_data_exist : false; + + if (!is_domain_index_exist && + !is_rowkey_vid_exist && + !is_vid_rowkey_exist && + !is_index_id_exist && + !is_snapshot_data_exist) { + task_status_ = ObDDLTaskStatus::SUCCESS; + } + } + return ret; +} + +/* + create drop none share vector index table task and wait task +*/ +int ObDropVecIndexTask::prepare(const share::ObDDLTaskStatus &new_status) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret)); + } + if (OB_FAIL(switch_status(new_status, true/*enable_flt*/, ret))) { + // overwrite ret + LOG_WARN("fail to switch status", K(ret)); + } + return ret; +} + +int ObDropVecIndexTask::drop_aux_index_table(const share::ObDDLTaskStatus &new_status) +{ + int ret = OB_SUCCESS; + bool has_finished = false; + ObSchemaGetterGuard schema_guard; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDropVecIndexTask has not been inited", K(ret)); + } else if (OB_ISNULL(root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, root service is nullptr", K(ret), KP(root_service_)); + } else if (OB_FAIL(root_service_->get_schema_service().get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("fail to get tenant schema guard", K(ret), K(tenant_id_)); + } else if (0 == domain_index_.task_id_ && domain_index_.is_valid() + && OB_FAIL(create_drop_index_task(schema_guard, domain_index_.table_id_, domain_index_.index_name_, domain_index_.task_id_, true/* is_domain_index */))) { + LOG_WARN("fail to create drop index task", K(ret), K(domain_index_)); + } else if (0 == vec_index_id_.task_id_ && vec_index_id_.is_valid() + && OB_FAIL(create_drop_index_task(schema_guard, vec_index_id_.table_id_, vec_index_id_.index_name_, vec_index_id_.task_id_))) { + LOG_WARN("fail to create drop index task", K(ret), K(vec_index_id_)); + } else if (0 == vec_index_snapshot_data_.task_id_ && vec_index_snapshot_data_.is_valid() + && OB_FAIL(create_drop_index_task(schema_guard, vec_index_snapshot_data_.table_id_, vec_index_snapshot_data_.index_name_, vec_index_snapshot_data_.task_id_))) { + LOG_WARN("fail to create drop index task", K(ret), K(vec_index_snapshot_data_)); + } else if (OB_FAIL(update_task_message())) { + LOG_WARN("fail to update domain_index_, vec_index_id_, vec_index_snapshot_data_ to __all_ddl_task_status", K(ret)); + } else if (OB_FAIL(wait_none_share_index_child_task_finish(has_finished))) { + LOG_WARN("fail to wait vec none share child task finish", K(ret)); + } + if (has_finished) { + // overwrite return code + if (OB_FAIL(switch_status(new_status, true/*enable_flt*/, ret))) { + LOG_WARN("fail to switch status", K(ret), K(new_status)); + } else { + vec_index_snapshot_data_.table_id_ = OB_INVALID_ID; + } + } + return ret; +} + +int ObDropVecIndexTask::check_and_wait_finish(const share::ObDDLTaskStatus &new_status) +{ + int ret = OB_SUCCESS; + bool has_finished = false; + if (OB_FAIL(create_drop_share_index_task())) { + LOG_WARN("fail to create drop share index child task", K(ret)); + } else if (0 == rowkey_vid_.task_id_ && 0 == vid_rowkey_.task_id_) { + // If there are other vector indexes, there is no need to drop the rowkey vid auxiliary table. And the task + // status is set to success and skipped. + has_finished = true; + } else if (OB_FAIL(wait_share_index_child_task_finish(has_finished))) { + LOG_WARN("fail to wait share index child task finish", K(ret)); + } + if (has_finished) { + // overwrite return code + if (OB_FAIL(switch_status(new_status, true/*enable_flt*/, ret))) { + LOG_WARN("fail to switch status", K(ret), K(new_status)); + } + } + return ret; +} + +int ObDropVecIndexTask::check_drop_index_finish( + const uint64_t tenant_id, + const int64_t task_id, + const int64_t table_id, + bool &has_finished) +{ + int ret = OB_SUCCESS; + const ObAddr unused_addr; + int64_t unused_user_msg_len = 0; + share::ObDDLErrorMessageTableOperator::ObBuildDDLErrorMessage error_message; + has_finished = false; + if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || task_id <= 0 || OB_INVALID_ID == table_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(tenant_id), K(task_id), K(table_id)); + } else if (OB_FAIL(share::ObDDLErrorMessageTableOperator::get_ddl_error_message( + tenant_id, + task_id, + -1/*target_object_id*/, + table_id, + *GCTX.sql_proxy_, + error_message, + unused_user_msg_len))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + LOG_INFO("ddl task not finish", K(ret), K(tenant_id), K(task_id), K(table_id)); + } else { + LOG_WARN("fail to get ddl error message", K(ret), K(tenant_id), K(task_id), K(table_id)); + } + } else { + ret = error_message.ret_code_; + has_finished = true; + } + LOG_INFO("wait build index finish", K(ret), K(tenant_id), K(task_id), K(table_id), K(has_finished)); + return ret; +} + +int ObDropVecIndexTask::wait_child_task_finish( + const common::ObIArray &child_task_ids, + bool &has_finished) +{ + int ret = OB_SUCCESS; + if (0 == child_task_ids.count()) { + has_finished = true; + } else { + bool finished = true; + for (int64_t i = 0; OB_SUCC(ret) && finished && i < child_task_ids.count(); ++i) { + const ObVecIndexDDLChildTaskInfo &task_info = child_task_ids.at(i); + finished = false; + if (-1 == task_info.task_id_) { + finished = true; + } else if (OB_FAIL(check_drop_index_finish(tenant_id_, task_info.task_id_, task_info.table_id_, finished))) { + LOG_WARN("fail to check vec index child task finish", K(ret)); + } else if (!finished) { // nothing to do + LOG_INFO("child task hasn't been finished", K(tenant_id_), K(task_info)); + } + } + if (OB_SUCC(ret) && finished) { + has_finished = true; + } + } + return ret; +} + +int ObDropVecIndexTask::wait_none_share_index_child_task_finish(bool &has_finished) +{ + int ret = OB_SUCCESS; + ObSEArray vec_child_tasks; + if (domain_index_.is_valid() && OB_FAIL(vec_child_tasks.push_back(domain_index_))) { // delta_buffer_table + LOG_WARN("fail to push back index id table child task", K(ret)); + } else if (vec_index_id_.is_valid() && OB_FAIL(vec_child_tasks.push_back(vec_index_id_))) { + LOG_WARN("fail to push back delta buffer table child task", K(ret)); + } else if (vec_index_snapshot_data_.is_valid() && OB_FAIL(vec_child_tasks.push_back(vec_index_snapshot_data_))) { + LOG_WARN("fail to push back index snapshot data table child task", K(ret)); + } else if (OB_FAIL(wait_child_task_finish(vec_child_tasks, has_finished))) { + LOG_WARN("fail to wait child task finish", K(ret), K(vec_child_tasks)); + } + return ret; +} + +int ObDropVecIndexTask::wait_share_index_child_task_finish(bool &has_finished) +{ + int ret = OB_SUCCESS; + ObSEArray vec_child_tasks; + if (vid_rowkey_.is_valid() && OB_FAIL(vec_child_tasks.push_back(vid_rowkey_))) { + LOG_WARN("fail to push back vid rowkey table child task", K(ret)); + } else if (rowkey_vid_.is_valid() && OB_FAIL(vec_child_tasks.push_back(rowkey_vid_))) { + LOG_WARN("fail to push back rowkey vid table child task", K(ret)); + } else if (OB_FAIL(wait_child_task_finish(vec_child_tasks, has_finished))) { + LOG_WARN("fail to wait child task finish", K(ret), K(vec_child_tasks)); + } + return ret; +} + +int ObDropVecIndexTask::create_drop_index_task( + share::schema::ObSchemaGetterGuard &guard, + const uint64_t index_tid, + const common::ObString &index_name, + int64_t &task_id, + const bool is_domain_index) +{ + int ret = OB_SUCCESS; + ObSqlString drop_index_sql; + const ObTableSchema *index_schema = nullptr; + const ObDatabaseSchema *database_schema = nullptr; + const ObTableSchema *data_table_schema = nullptr; + bool is_index_exist = false; + if (OB_ISNULL(root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, root service is nullptr", K(ret), KP(root_service_)); + } else if (OB_UNLIKELY(OB_INVALID_ID == index_tid || index_name.empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(index_tid), K(index_name)); + } else if (OB_FAIL(guard.check_table_exist(tenant_id_, index_tid, is_index_exist))) { + LOG_WARN("fail to check table exist", K(ret), K(tenant_id_), K(index_tid)); + } else if (!is_index_exist) { + // nothing to do, just by pass. + task_id = -1; + } else if (OB_FAIL(guard.get_table_schema(tenant_id_, index_tid, index_schema))) { + LOG_WARN("fail to get index table schema", K(ret), K(tenant_id_), K(index_tid)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, index schema is nullptr", K(ret), KP(index_schema)); + } else if (OB_FAIL(guard.get_database_schema(tenant_id_, index_schema->get_database_id(), database_schema))) { + LOG_WARN("fail to get database schema", K(ret), K(index_schema->get_database_id())); + } else if (OB_FAIL(guard.get_table_schema(tenant_id_, index_schema->get_data_table_id(), data_table_schema))) { + LOG_WARN("fail to get data table schema", K(ret), K(index_schema->get_data_table_id())); + } else if (OB_UNLIKELY(nullptr == database_schema || nullptr == data_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, schema is nullptr", K(ret), KP(database_schema), KP(data_table_schema)); + } else if (is_domain_index && OB_FAIL(drop_index_sql.assign(drop_index_arg_.ddl_stmt_str_))) { + LOG_WARN("assign user drop index sql failed", K(ret)); + } else { + int64_t ddl_rpc_timeout_us = 0; + obrpc::ObDropIndexArg arg; + obrpc::ObDropIndexRes res; + arg.is_inner_ = true; + arg.tenant_id_ = tenant_id_; + arg.exec_tenant_id_ = tenant_id_; + arg.index_table_id_ = index_tid; + arg.session_id_ = data_table_schema->get_session_id(); + arg.index_name_ = index_name; + arg.table_name_ = data_table_schema->get_table_name(); + arg.database_name_ = database_schema->get_database_name_str(); + arg.index_action_type_ = obrpc::ObIndexArg::DROP_INDEX; + arg.ddl_stmt_str_ = drop_index_sql.string(); + arg.is_add_to_scheduler_ = true; + arg.task_id_ = task_id_; + if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout( + index_schema->get_all_part_num() + data_table_schema->get_all_part_num(), ddl_rpc_timeout_us))) { + LOG_WARN("fail to get ddl rpc timeout", K(ret)); + } else if (OB_FAIL(root_service_->get_common_rpc_proxy().timeout(ddl_rpc_timeout_us).drop_index(arg, res))) { + LOG_WARN("fail to drop index", K(ret), K(ddl_rpc_timeout_us), K(arg), K(res.task_id_)); + } else { + task_id = res.task_id_; + } + LOG_INFO("drop index", K(ret), K(index_tid), K(index_name), K(task_id), + "data table name", data_table_schema->get_table_name_str(), + "database name", database_schema->get_database_name_str()); + } + return ret; +} + +int ObDropVecIndexTask::create_drop_share_index_task() +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, root service is nullptr", K(ret), KP(root_service_)); + } else if (OB_FAIL(root_service_->get_schema_service().get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("fail to get tenant schema guard", K(ret), K(tenant_id_)); + } else if (0 == rowkey_vid_.task_id_ && rowkey_vid_.is_valid() + && OB_FAIL(create_drop_index_task(schema_guard, rowkey_vid_.table_id_, rowkey_vid_.index_name_, rowkey_vid_.task_id_))) { + LOG_WARN("fail to create drop index task", K(ret), K(rowkey_vid_)); + } else if (0 == vid_rowkey_.task_id_ && vid_rowkey_.is_valid() + && OB_FAIL(create_drop_index_task(schema_guard, vid_rowkey_.table_id_, vid_rowkey_.index_name_, vid_rowkey_.task_id_))) { + LOG_WARN("fail to create drop index task", K(ret), K(vid_rowkey_)); + } else if (OB_FAIL(update_task_message())) { + LOG_WARN("fail to update vid_rowkey_ and rowkey_vid_ to __all_ddl_task_status", K(ret)); + } + return ret; +} + +int ObDropVecIndexTask::succ() +{ + return cleanup(); +} + +int ObDropVecIndexTask::fail() +{ + return cleanup(); +} + +int ObDropVecIndexTask::check_and_cancel_del_dag(bool &all_dag_exit) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (!vec_index_snapshot_data_.is_valid() || !del_lob_meta_row_task_submitted_) { + all_dag_exit = true; + } else if (OB_FAIL(ObDDLUtil::check_and_cancel_single_replica_dag(this, vec_index_snapshot_data_.table_id_, + vec_index_snapshot_data_.table_id_, check_dag_exit_tablets_map_, check_dag_exit_retry_cnt_, false/*is_complement_data_dag*/, all_dag_exit))) { + LOG_WARN("fail to check and cancel delete lob mete row dag", K(ret), K(vec_index_snapshot_data_)); + } + return ret; +} + +int ObDropVecIndexTask::release_snapshot(const int64_t snapshot_version) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (!vec_index_snapshot_data_.is_valid()) { + // do nothing + } else if (OB_FAIL(ObDDLUtil::release_snapshot(this, vec_index_snapshot_data_.table_id_, vec_index_snapshot_data_.table_id_, snapshot_version))) { + LOG_WARN("release snapshot failed", K(ret)); + } + return ret; +} + +int ObDropVecIndexTask::finish() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret)); + } else if (snapshot_version_ > 0 && OB_FAIL(release_snapshot(snapshot_version_))) { + LOG_WARN("release snapshot failed", K(ret)); + } + return ret; +} + +int ObDropVecIndexTask::exit_all_dags_and_clean() +{ + int ret = OB_SUCCESS; + bool all_delete_lob_meta_row_dag_exit = true; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret)); + } else if (OB_FAIL(check_and_cancel_del_dag(all_delete_lob_meta_row_dag_exit))) { + LOG_WARN("check and cancel delete lob meta row data dag failed", K(ret)); + } else if (!all_delete_lob_meta_row_dag_exit) { + if (REACH_COUNT_INTERVAL(1000L)) { + LOG_INFO("wait all delete lob meta row data dag exit", K(dst_tenant_id_), K(task_id_)); + } + } else if (OB_FAIL(finish())) { + LOG_WARN("finish tans failed", K(ret)); + } else if (OB_FAIL(cleanup())) { + LOG_WARN("cleanup failed", K(ret)); + } + return ret; +} + +int ObDropVecIndexTask::cleanup_impl() +{ + int ret = OB_SUCCESS; + ObString unused_str; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(report_error_code(unused_str))) { + LOG_WARN("report error code failed", K(ret)); + } else if (OB_FAIL(ObDDLTaskRecordOperator::delete_record(root_service_->get_sql_proxy(), tenant_id_, task_id_))) { + LOG_WARN("delete task record failed", K(ret), K(task_id_), K(schema_version_)); + } else { + need_retry_ = false; // clean succ, stop the task + } + LOG_INFO("clean task finished", K(ret), K(*this)); + return ret; +} + +int ObDropVecIndexTask::send_build_single_replica_request() +{ + int ret = OB_SUCCESS; + uint64_t tenant_data_format_version = 0; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObColumnRedefinitionTask has not been inited", K(ret)); + } else { + ObDDLSingleReplicaExecutorParam param; + param.tenant_id_ = tenant_id_; + param.dest_tenant_id_ = dst_tenant_id_; + param.type_ = task_type_; + param.source_table_id_ = vec_index_snapshot_data_.table_id_; + param.dest_table_id_ = target_object_id_; + param.schema_version_ = schema_version_; + param.dest_schema_version_ = dst_schema_version_; + param.snapshot_version_ = snapshot_version_; // should > 0, but = 0 + param.task_id_ = task_id_; + param.parallelism_ = std::max(parallelism_, 1L); + param.execution_id_ = execution_id_; // should >= 0 + param.data_format_version_ = data_format_version_; // should > 0 + param.consumer_group_id_ = consumer_group_id_; + + if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id_, vec_index_snapshot_data_.table_id_, param.source_tablet_ids_))) { + LOG_WARN("fail to get tablets", K(ret), K(tenant_id_), K(object_id_)); + } else if (OB_FAIL(ObDDLUtil::get_tablets(dst_tenant_id_, vec_index_snapshot_data_.table_id_, param.dest_tablet_ids_))) { + LOG_WARN("fail to get tablets", K(ret), K(tenant_id_), K(target_object_id_)); + } else if (OB_FAIL(replica_builder_.build(param))) { + LOG_WARN("fail to send build single replica", K(ret)); + } else { + del_lob_meta_row_task_submitted_ = true; + delte_lob_meta_request_time_ = ObTimeUtility::current_time(); + } + } + return ret; +} + +// check whether all leaders have completed the task +int ObDropVecIndexTask::check_build_single_replica(bool &is_end) +{ + int ret = OB_SUCCESS; + is_end = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret)); + } else if (OB_FAIL(replica_builder_.check_build_end(false/*do not need check sum*/,is_end, delte_lob_meta_job_ret_code_))) { + LOG_WARN("fail to check build end", K(ret)); + } else if (!is_end) { + if (delte_lob_meta_request_time_ + ObDDLUtil::calc_inner_sql_execute_timeout() < ObTimeUtility::current_time()) { // timeout, retry + del_lob_meta_row_task_submitted_ = false; + delte_lob_meta_request_time_ = 0; + } + } + return ret; +} + +// update sstable complement status for all leaders +int ObDropVecIndexTask::update_drop_lob_meta_row_job_status(const common::ObTabletID &tablet_id, + const int64_t snapshot_version, + const int64_t execution_id, + const int ret_code, + const ObDDLTaskInfo &addition_info) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDropVecIndexTask has not been inited", K(ret)); + } else if (ObDDLTaskStatus::DROP_LOB_META_ROW != task_status_) { + // by pass, may be network delay + } else if (snapshot_version != snapshot_version_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("snapshot version not match", K(ret), K(snapshot_version), K(snapshot_version_)); + } else if (execution_id < execution_id_) { + LOG_INFO("receive a mismatch execution result, ignore", K(ret_code), K(execution_id), K(execution_id_)); + } else if (OB_FAIL(replica_builder_.set_partition_task_status(tablet_id, + ret_code, + addition_info.row_scanned_, + addition_info.row_inserted_))) { + LOG_WARN("fail to set partition task status", K(ret)); + } + return ret; +} + +} // end namespace rootserver +} // end namespace oceanbase diff --git a/src/rootserver/ddl_task/ob_drop_vec_index_task.h b/src/rootserver/ddl_task/ob_drop_vec_index_task.h new file mode 100644 index 0000000000..403e1b14a8 --- /dev/null +++ b/src/rootserver/ddl_task/ob_drop_vec_index_task.h @@ -0,0 +1,126 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_ROOTSERVER_OB_DROP_VEC_INDEX_TASK_H +#define OCEANBASE_ROOTSERVER_OB_DROP_VEC_INDEX_TASK_H + +#include "rootserver/ddl_task/ob_drop_index_task.h" + +namespace oceanbase +{ +namespace rootserver +{ + +class ObDropVecIndexTask : public ObDDLTask +{ +public: + ObDropVecIndexTask(); + virtual ~ObDropVecIndexTask(); + + int init( + const uint64_t tenant_id, + const int64_t task_id, + const uint64_t data_table_id, + const share::ObDDLType ddl_type, + const ObVecIndexDDLChildTaskInfo &rowkey_vid, + const ObVecIndexDDLChildTaskInfo &vid_rowkey, + const ObVecIndexDDLChildTaskInfo &domain_index, + const ObVecIndexDDLChildTaskInfo &vec_delta_buffer, + const ObVecIndexDDLChildTaskInfo &vec_index_snapshot_data, + const int64_t schema_version, + const int64_t consumer_group_id, + const uint64_t tenant_data_version, + const obrpc::ObDropIndexArg &drop_index_arg); + int init(const ObDDLTaskRecord &task_record); + virtual int process() override; + virtual int serialize_params_to_message( + char *buf, + const int64_t buf_size, + int64_t &pos) const override; + virtual int deserialize_params_from_message( + const uint64_t tenant_id, + const char *buf, + const int64_t buf_size, + int64_t &pos) override; + virtual int64_t get_serialize_param_size() const override; + virtual int on_child_task_finish(const uint64_t child_task_key, const int ret_code) override { return OB_SUCCESS; } + int64_t get_build_replica_request_time(); + int reap_old_replica_build_task(bool &need_exec_new_inner_sql); + int update_drop_lob_meta_row_job_status(const common::ObTabletID &tablet_id, + const int64_t snapshot_version, + const int64_t execution_id, + const int ret_code, + const ObDDLTaskInfo &addition_info); + + INHERIT_TO_STRING_KV("ObDDLTask", ObDDLTask, K_(rowkey_vid), K_(vid_rowkey), K_(domain_index), K_(vec_index_id), + K_(vec_index_snapshot_data), K(wait_trans_ctx_), K(snapshot_held_)); +private: + static const int64_t OB_DROP_VEC_INDEX_TASK_VERSION = 1; + int deep_copy_index_arg(common::ObIAllocator &allocator, + const obrpc::ObDropIndexArg &src_index_arg, + obrpc::ObDropIndexArg &dst_index_arg); + int check_switch_succ(); + int prepare(const share::ObDDLTaskStatus &status); + int check_and_wait_finish(const share::ObDDLTaskStatus &status); + int release_snapshot(const int64_t snapshot_version); + int obtain_snapshot(const share::ObDDLTaskStatus next_task_status); + int drop_aux_index_table(const share::ObDDLTaskStatus &status); + int drop_lob_meta_row(const share::ObDDLTaskStatus next_task_status); + int check_and_cancel_del_dag(bool &all_dag_exit); + int exit_all_dags_and_clean(); + int finish(); + int check_drop_index_finish( + const uint64_t tenant_id, + const int64_t task_id, + const int64_t table_id, + bool &has_finished); + int wait_child_task_finish( + const common::ObIArray &child_task_ids, + bool &has_finished); + int wait_none_share_index_child_task_finish(bool &has_finished); + int wait_share_index_child_task_finish(bool &has_finished); + int create_drop_index_task( + share::schema::ObSchemaGetterGuard &guard, + const uint64_t index_tid, + const common::ObString &index_name, + int64_t &task_id, + const bool is_domain_index = false); + int create_drop_share_index_task(); + int update_task_message(); + int succ(); + int fail(); + int send_build_single_replica_request(); + int check_build_single_replica(bool &is_end); + virtual int cleanup_impl() override; + +private: + ObRootService *root_service_; + ObVecIndexDDLChildTaskInfo rowkey_vid_; + ObVecIndexDDLChildTaskInfo vid_rowkey_; + ObVecIndexDDLChildTaskInfo domain_index_; + ObVecIndexDDLChildTaskInfo vec_index_id_; + ObVecIndexDDLChildTaskInfo vec_index_snapshot_data_; + obrpc::ObDropIndexArg drop_index_arg_; + ObDDLSingleReplicaExecutor replica_builder_; + common::hash::ObHashMap check_dag_exit_tablets_map_; // for delete lob meta row data ddl only. + ObDDLWaitTransEndCtx wait_trans_ctx_; + int64_t delte_lob_meta_request_time_; + int64_t delte_lob_meta_job_ret_code_; + int64_t check_dag_exit_retry_cnt_; + bool del_lob_meta_row_task_submitted_; + bool snapshot_held_; +}; + +} // end namespace rootserver +} // end namespace oceanbase + +#endif // OCEANBASE_ROOTSERVER_OB_DROP_domain_INDEX_TASK_H diff --git a/src/rootserver/ddl_task/ob_fts_index_build_task.cpp b/src/rootserver/ddl_task/ob_fts_index_build_task.cpp index 56195498a4..0ccc11e93b 100644 --- a/src/rootserver/ddl_task/ob_fts_index_build_task.cpp +++ b/src/rootserver/ddl_task/ob_fts_index_build_task.cpp @@ -35,14 +35,14 @@ ObFtsIndexBuildTask::ObFtsIndexBuildTask() doc_rowkey_aux_table_id_(OB_INVALID_ID), fts_index_aux_table_id_(OB_INVALID_ID), fts_doc_word_aux_table_id_(OB_INVALID_ID), - rowkey_doc_schema_generated_(false), - doc_rowkey_schema_generated_(false), - fts_index_aux_schema_generated_(false), - fts_doc_word_schema_generated_(false), rowkey_doc_task_submitted_(false), doc_rowkey_task_submitted_(false), fts_index_aux_task_submitted_(false), fts_doc_word_task_submitted_(false), + rowkey_doc_task_id_(0), + doc_rowkey_task_id_(0), + fts_index_aux_task_id_(0), + fts_doc_word_task_id_(0), drop_index_task_id_(0), drop_index_task_submitted_(false), root_service_(nullptr), @@ -120,7 +120,6 @@ int ObFtsIndexBuildTask::init( create_index_arg_.exec_tenant_id_ = tenant_id; fts_index_aux_table_id_ = index_table_id_; // fts_index aux schema already generated before ddl task begin - fts_index_aux_schema_generated_ = true; task_version_ = OB_FTS_INDEX_BUILD_TASK_VERSION; start_time_ = ObTimeUtility::current_time(); data_format_version_ = tenant_data_format_version; @@ -191,7 +190,6 @@ int ObFtsIndexBuildTask::init(const ObDDLTaskRecord &task_record) target_object_id_ = index_table_id; index_table_id_ = index_table_id; fts_index_aux_table_id_ = index_table_id_; - fts_index_aux_schema_generated_ = true; execution_id_ = task_record.execution_id_; ret_code_ = task_record.ret_code_; start_time_ = ObTimeUtility::current_time(); @@ -522,71 +520,56 @@ int ObFtsIndexBuildTask::prepare() return ret; } +// patch from fts2 branch +int ObFtsIndexBuildTask::prepare_aux_table( + const ObIndexType index_type, + bool &task_submitted, + uint64_t &aux_table_id, + int64_t &res_task_id) +{ + int ret = OB_SUCCESS; + const int64_t num_fts_child_task = 4; + SMART_VAR(obrpc::ObCreateIndexArg, index_arg) { + if (OB_FAIL(construct_create_index_arg(index_type, index_arg))) { + LOG_WARN("failed to construct rowkey doc id arg", K(ret)); + } else if (OB_FAIL(ObDomainIndexBuilderUtil::prepare_aux_table(task_submitted, + aux_table_id, + res_task_id, + lock_, + object_id_, + tenant_id_, + task_id_, + index_arg, + root_service_, + dependent_task_result_map_, + obrpc::ObRpcProxy::myaddr_, + num_fts_child_task))) { + LOG_WARN("fail to prepare_aux_table", K(ret), K(index_type)); + } + } // samart var + return ret; +} + int ObFtsIndexBuildTask::prepare_rowkey_doc_table() { int ret = OB_SUCCESS; bool state_finished = false; - const int64_t num_fts_child_task = 4; + const ObIndexType index_type = ObIndexType::INDEX_TYPE_ROWKEY_DOC_ID_LOCAL; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); } else if (ObDDLTaskStatus::GENERATE_ROWKEY_DOC_SCHEMA != task_status_) { ret = OB_STATE_NOT_MATCH; LOG_WARN("task status not match", K(ret), K(task_status_)); - } else if (!dependent_task_result_map_.created() && - OB_FAIL(dependent_task_result_map_.create(num_fts_child_task, - lib::ObLabel("DepTasMap")))) { - LOG_WARN("create dependent task map failed", K(ret)); - } else { - const uint64_t data_table_id = object_id_; - int64_t ddl_rpc_timeout = 0; - SMART_VARS_4((obrpc::ObCreateIndexArg, rowkey_doc_arg), - (ObDDLTaskRecord, rowkey_doc_task_record), - (obrpc::ObGenerateAuxIndexSchemaArg, arg), - (obrpc::ObGenerateAuxIndexSchemaRes, res)) { - ObDDLService &ddl_service = root_service_->get_ddl_service(); - arg.tenant_id_ = tenant_id_; - arg.exec_tenant_id_ = tenant_id_; - arg.data_table_id_ = data_table_id; - arg.task_id_ = task_id_; - obrpc::ObCommonRpcProxy *common_rpc = nullptr; - if (OB_FAIL(construct_rowkey_doc_arg(rowkey_doc_arg))) { - LOG_WARN("failed to construct rowkey doc id arg", K(ret)); - } else if (!rowkey_doc_schema_generated_) { - if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout(tenant_id_, - data_table_id, - ddl_rpc_timeout))) { - LOG_WARN("get ddl rpc timeout fail", K(ret)); - } else if (OB_ISNULL(root_service_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("root_service is nullptr", K(ret)); - } else if (OB_FAIL(arg.create_index_arg_.assign(rowkey_doc_arg))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to assign create index arg", K(ret)); - } else if (OB_FALSE_IT(common_rpc = root_service_->get_ddl_service().get_common_rpc())) { - } else if (OB_ISNULL(common_rpc)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("common rpc is nullptr", K(ret)); - } else if (OB_FAIL(common_rpc-> to(obrpc::ObRpcProxy::myaddr_). - timeout(ddl_rpc_timeout).generate_aux_index_schema(arg, - res))) { - LOG_WARN("generate fts aux index schema failed", K(ret), K(arg)); - } else if (res.schema_generated_) { - rowkey_doc_schema_generated_ = true; - rowkey_doc_aux_table_id_ = res.aux_table_id_; - } - } - if (OB_FAIL(ret)) { - } else if (!rowkey_doc_schema_generated_ ) { - } else if (rowkey_doc_task_submitted_) { - } else if (OB_FAIL(submit_build_aux_index_task(rowkey_doc_arg, - rowkey_doc_task_record, - rowkey_doc_task_submitted_))) { - LOG_WARN("fail to submit build rowkey doc id index task", K(ret)); - } else { - state_finished = true; - } - } + } else if (OB_FAIL(prepare_aux_table(index_type, + rowkey_doc_task_submitted_, + rowkey_doc_aux_table_id_, + rowkey_doc_task_id_))) { + LOG_WARN("failed to prepare aux table", K(ret), K(index_type), + K(rowkey_doc_task_submitted_), K(rowkey_doc_aux_table_id_)); + } + if (OB_SUCC(ret) && rowkey_doc_task_submitted_) { + state_finished = true; } if (state_finished) { ObDDLTaskStatus next_status; @@ -605,117 +588,38 @@ int ObFtsIndexBuildTask::prepare_aux_index_tables() { int ret = OB_SUCCESS; bool state_finished = false; - const int64_t num_fts_child_task = 4; + const ObIndexType doc_rowkey_type = ObIndexType::INDEX_TYPE_DOC_ID_ROWKEY_LOCAL; + const ObIndexType fts_index_aux_type = ObIndexType::INDEX_TYPE_FTS_INDEX_LOCAL; + const ObIndexType fts_doc_word_type = ObIndexType::INDEX_TYPE_FTS_DOC_WORD_LOCAL; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); } else if (ObDDLTaskStatus::GENERATE_DOC_AUX_SCHEMA != task_status_) { ret = OB_STATE_NOT_MATCH; LOG_WARN("task status not match", K(ret), K(task_status_)); - } else if (!dependent_task_result_map_.created() && - OB_FAIL(dependent_task_result_map_.create(num_fts_child_task, - lib::ObLabel("DepTasMap")))) { - LOG_WARN("create dependent task map failed", K(ret)); - } else { - const uint64_t data_table_id = object_id_; - int64_t ddl_rpc_timeout = 0; - SMART_VARS_3((obrpc::ObCreateIndexArg, doc_rowkey_arg), - (obrpc::ObCreateIndexArg, fts_index_aux_arg), - (obrpc::ObCreateIndexArg, fts_doc_word_arg)) { - SMART_VARS_3((ObDDLTaskRecord, doc_rowkey_task_record), - (ObDDLTaskRecord, fts_index_aux_task_record), - (ObDDLTaskRecord, fts_doc_word_task_record)) { - SMART_VARS_4((obrpc::ObGenerateAuxIndexSchemaArg, doc_rowkey_schema_arg), - (obrpc::ObGenerateAuxIndexSchemaRes, doc_rowkey_schema_res), - (obrpc::ObGenerateAuxIndexSchemaArg, fts_doc_word_schema_arg), - (obrpc::ObGenerateAuxIndexSchemaRes, fts_doc_word_schema_res)) { - ObDDLService &ddl_service = root_service_->get_ddl_service(); - doc_rowkey_schema_arg.tenant_id_ = tenant_id_; - doc_rowkey_schema_arg.data_table_id_ = data_table_id; - doc_rowkey_schema_arg.exec_tenant_id_ = tenant_id_; - doc_rowkey_schema_arg.task_id_ = task_id_; - fts_doc_word_schema_arg.tenant_id_ = tenant_id_; - fts_doc_word_schema_arg.data_table_id_ = data_table_id; - fts_doc_word_schema_arg.exec_tenant_id_ = tenant_id_; - fts_doc_word_schema_arg.task_id_ = task_id_; - obrpc::ObCommonRpcProxy *common_rpc = nullptr; - if (OB_ISNULL(root_service_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("root_service is nullptr", K(ret)); - } else if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout(tenant_id_, - data_table_id, - ddl_rpc_timeout))) { - LOG_WARN("get ddl rpc timeout fail", K(ret)); - } else if (OB_FAIL(construct_doc_rowkey_arg(doc_rowkey_arg))) { - LOG_WARN("fail to construct doc id rowkey arg", K(ret)); - } else if (!doc_rowkey_schema_generated_) { - if (OB_FAIL(doc_rowkey_schema_arg.create_index_arg_.assign(doc_rowkey_arg))) { - LOG_WARN("fail to assign create index arg", K(ret)); - } else if (OB_FALSE_IT(common_rpc = root_service_->get_ddl_service().get_common_rpc())) { - } else if (OB_ISNULL(common_rpc)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("common rpc is nullptr", K(ret)); - } else if (OB_FAIL(common_rpc->to(obrpc::ObRpcProxy::myaddr_). - timeout(ddl_rpc_timeout). - generate_aux_index_schema(doc_rowkey_schema_arg, - doc_rowkey_schema_res))) { - LOG_WARN("generate fts doc rowkey schema failed", K(ret), K(doc_rowkey_schema_arg)); - } else if (doc_rowkey_schema_res.schema_generated_) { - doc_rowkey_schema_generated_ = true; - doc_rowkey_aux_table_id_ = doc_rowkey_schema_res.aux_table_id_; - } - } - if (OB_FAIL(ret)) { - } else if (!doc_rowkey_schema_generated_) { - } else if (doc_rowkey_task_submitted_) { - } else if (OB_FAIL(submit_build_aux_index_task(doc_rowkey_arg, - doc_rowkey_task_record, - doc_rowkey_task_submitted_))) { - LOG_WARN("fail to submit build doc id rowkey index task", K(ret)); - } - // NOTE unlike other 3 aux index schemas which require rpc to generate schema, - // fts index schema is generated before this ddl task start - if (OB_FAIL(ret)) { - } else if (OB_FAIL(construct_fts_index_aux_arg(fts_index_aux_arg))) { - LOG_WARN("fail to construct fts index aux arg", K(ret)); - } else if (fts_index_aux_task_submitted_) { - } else if (OB_FAIL(submit_build_aux_index_task(fts_index_aux_arg, - fts_index_aux_task_record, - fts_index_aux_task_submitted_))) { - LOG_WARN("fail to submit build fts index aux task", K(ret)); - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(construct_fts_doc_word_arg(fts_doc_word_arg))) { - LOG_WARN("fail to construct fts doc word arg", K(ret)); - } else if (!fts_doc_word_schema_generated_) { - if (OB_FAIL(fts_doc_word_schema_arg.create_index_arg_.assign(fts_doc_word_arg))) { - LOG_WARN("fail to assign create index arg", K(ret)); - } else if (OB_FAIL(common_rpc-> to(obrpc::ObRpcProxy::myaddr_). - timeout(ddl_rpc_timeout). - generate_aux_index_schema(fts_doc_word_schema_arg, - fts_doc_word_schema_res))) { - LOG_WARN("generate fts doc word schema failed", K(ret), K(fts_doc_word_schema_arg)); - } else if (fts_doc_word_schema_res.schema_generated_) { - fts_doc_word_schema_generated_ = true; - fts_doc_word_aux_table_id_ = fts_doc_word_schema_res.aux_table_id_; - } - } - if (OB_FAIL(ret)) { - } else if (!fts_doc_word_schema_generated_) { - } else if (fts_doc_word_task_submitted_) { - } else if (OB_FAIL(submit_build_aux_index_task(fts_doc_word_arg, - fts_doc_word_task_record, - fts_doc_word_task_submitted_))) { - LOG_WARN("fail to submit build fts doc word index task", K(ret)); - } - if (doc_rowkey_task_submitted_ && fts_index_aux_task_submitted_ && - fts_doc_word_task_submitted_) { - state_finished = true; - } - } - } // SMART_VARS - } // SMART_VARS - } // SMART_VARS + } else if (OB_FAIL(prepare_aux_table(doc_rowkey_type, + doc_rowkey_task_submitted_, + doc_rowkey_aux_table_id_, + doc_rowkey_task_id_))) { + LOG_WARN("failed to prepare aux table", K(ret), + K(doc_rowkey_task_submitted_), K(doc_rowkey_aux_table_id_)); + } else if (OB_FAIL(prepare_aux_table(fts_index_aux_type, + fts_index_aux_task_submitted_, + fts_index_aux_table_id_, + fts_index_aux_task_id_))) { + LOG_WARN("failed to prepare aux table", K(ret), + K(fts_index_aux_task_submitted_), K(fts_index_aux_table_id_)); + } else if (OB_FAIL(prepare_aux_table(fts_doc_word_type, + fts_doc_word_task_submitted_, + fts_doc_word_aux_table_id_, + fts_doc_word_task_id_))) { + LOG_WARN("failed to prepare aux table", K(ret), + K(fts_doc_word_task_submitted_), K(fts_doc_word_aux_table_id_)); + } + if (OB_SUCC(ret) && doc_rowkey_task_submitted_ && + fts_index_aux_task_submitted_ && fts_doc_word_task_submitted_) { + state_finished = true; + } if (state_finished) { ObDDLTaskStatus next_status; if (OB_FAIL(get_next_status(next_status))) { @@ -729,6 +633,34 @@ int ObFtsIndexBuildTask::prepare_aux_index_tables() return ret; } +int ObFtsIndexBuildTask::construct_create_index_arg( + const ObIndexType index_type, + obrpc::ObCreateIndexArg &arg) +{ + int ret = OB_SUCCESS; + if (share::schema::is_rowkey_doc_aux(index_type)) { + if (OB_FAIL(construct_rowkey_doc_arg(arg))) { + LOG_WARN("failed to construct rowkey doc arg", K(ret)); + } + } else if (share::schema::is_doc_rowkey_aux(index_type)) { + if (OB_FAIL(construct_doc_rowkey_arg(arg))) { + LOG_WARN("failed to construct doc rowkey arg", K(ret)); + } + } else if (share::schema::is_fts_index_aux(index_type)) { + if (OB_FAIL(construct_fts_index_aux_arg(arg))) { + LOG_WARN("failed to construct fts index aux arg", K(ret)); + } + } else if (share::schema::is_fts_doc_word_aux(index_type)) { + if (OB_FAIL(construct_fts_doc_word_arg(arg))) { + LOG_WARN("failed to construct fts doc word arg", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("undexpected index type", K(ret), K(index_type)); + } + return ret; +} + int ObFtsIndexBuildTask::construct_rowkey_doc_arg(obrpc::ObCreateIndexArg &arg) { int ret = OB_SUCCESS; @@ -820,7 +752,7 @@ int ObFtsIndexBuildTask::get_index_table_id( // wait data complement of aux index tables int ObFtsIndexBuildTask::wait_aux_table_complement() { - using task_iter = common::hash::ObHashMap::const_iterator; + using task_iter = common::hash::ObHashMap::const_iterator; int ret = OB_SUCCESS; bool child_task_failed = false; bool state_finished = false; @@ -961,7 +893,7 @@ int ObFtsIndexBuildTask::submit_build_aux_index_task( LOG_WARN("fail to schedule ddl task", K(ret), K(task_record)); } else { TCWLockGuard guard(lock_); - DependTaskStatus status; + share::ObDomainDependTaskStatus status; // check if child task is already added if (OB_FAIL(dependent_task_result_map_.get_refactored(index_table_id, status))) { @@ -1002,7 +934,7 @@ int ObFtsIndexBuildTask::on_child_task_finish( } else { TCWLockGuard guard(lock_); int64_t org_ret = INT64_MAX; - DependTaskStatus status; + share::ObDomainDependTaskStatus status; if (OB_FAIL(dependent_task_result_map_.get_refactored(child_task_key, status))) { if (OB_HASH_NOT_EXIST == ret) { @@ -1092,10 +1024,6 @@ int ObFtsIndexBuildTask::serialize_params_to_message( int64_t &pos) const { int ret = OB_SUCCESS; - int8_t rowkey_doc_generated = static_cast(rowkey_doc_schema_generated_); - int8_t doc_rowkey_generated = static_cast(doc_rowkey_schema_generated_); - int8_t fts_index_aux_generated = static_cast(fts_index_aux_schema_generated_); - int8_t fts_doc_word_generated = static_cast(fts_doc_word_schema_generated_); int8_t rowkey_doc_submitted = static_cast(rowkey_doc_task_submitted_); int8_t doc_rowkey_submitted = static_cast(doc_rowkey_task_submitted_); int8_t fts_index_aux_submitted = static_cast(fts_index_aux_task_submitted_); @@ -1128,26 +1056,6 @@ int ObFtsIndexBuildTask::serialize_params_to_message( pos, fts_doc_word_aux_table_id_))) { LOG_WARN("serialize fts doc word table id failed", K(ret)); - } else if (OB_FAIL(serialization::encode_i8(buf, - buf_len, - pos, - rowkey_doc_generated))) { - LOG_WARN("serialize rowkey doc schema generated failed", K(ret)); - } else if (OB_FAIL(serialization::encode_i8(buf, - buf_len, - pos, - doc_rowkey_generated))) { - LOG_WARN("serialize doc rowkey schema generated failed", K(ret)); - } else if (OB_FAIL(serialization::encode_i8(buf, - buf_len, - pos, - fts_index_aux_generated))) { - LOG_WARN("serialize fts index aux schema generated failed", K(ret)); - } else if (OB_FAIL(serialization::encode_i8(buf, - buf_len, - pos, - fts_doc_word_generated))) { - LOG_WARN("serialize fts doc word schema generated failed", K(ret)); } else if (OB_FAIL(serialization::encode_i8(buf, buf_len, pos, @@ -1168,6 +1076,26 @@ int ObFtsIndexBuildTask::serialize_params_to_message( pos, fts_doc_word_submitted))) { LOG_WARN("serialize fts doc word task submitted failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(buf, + buf_len, + pos, + rowkey_doc_task_id_))) { + LOG_WARN("serialize rowkey doc task id failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(buf, + buf_len, + pos, + doc_rowkey_task_id_))) { + LOG_WARN("serialize doc rowkey task id failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(buf, + buf_len, + pos, + fts_index_aux_task_id_))) { + LOG_WARN("serialize fts index aux task id failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(buf, + buf_len, + pos, + fts_doc_word_task_id_))) { + LOG_WARN("serialize fts doc word task id failed", K(ret)); } else if (OB_FAIL(serialization::encode_i8(buf, buf_len, pos, @@ -1189,10 +1117,7 @@ int ObFtsIndexBuildTask::deserialize_params_from_message( int64_t &pos) { int ret = OB_SUCCESS; - int8_t rowkey_doc_generated = 0; - int8_t doc_rowkey_generated = 0; - int8_t fts_index_aux_generated = 0; - int8_t fts_doc_word_generated = 0; + const int64_t num_fts_child_task = 4; int8_t rowkey_doc_submitted = 0; int8_t doc_rowkey_submitted = 0; int8_t fts_index_aux_submitted = 0; @@ -1233,26 +1158,6 @@ int ObFtsIndexBuildTask::deserialize_params_from_message( pos, fts_doc_word_aux_table_id_))) { LOG_WARN("fail to deserialize fts doc word table id", K(ret)); - } else if (OB_FAIL(serialization::decode_i8(buf, - data_len, - pos, - &rowkey_doc_generated))) { - LOG_WARN("fail to deserialize rowkey doc schema generated", K(ret)); - } else if (OB_FAIL(serialization::decode_i8(buf, - data_len, - pos, - &doc_rowkey_generated))) { - LOG_WARN("fail to deserialize doc rowkey schema generated", K(ret)); - } else if (OB_FAIL(serialization::decode_i8(buf, - data_len, - pos, - &fts_index_aux_generated))) { - LOG_WARN("fail to deserialize fts index aux schema generated", K(ret)); - } else if (OB_FAIL(serialization::decode_i8(buf, - data_len, - pos, - &fts_doc_word_generated))) { - LOG_WARN("fail to deserialize fts doc word schema generated", K(ret)); } else if (OB_FAIL(serialization::decode_i8(buf, data_len, pos, @@ -1273,6 +1178,26 @@ int ObFtsIndexBuildTask::deserialize_params_from_message( pos, &fts_doc_word_submitted))) { LOG_WARN("fail to deserialize fts doc word task submmitted", K(ret)); + } else if (OB_FAIL(serialization::decode_i64(buf, + data_len, + pos, + &rowkey_doc_task_id_))) { + LOG_WARN("fail to deserialize rowkey doc task id", K(ret)); + } else if (OB_FAIL(serialization::decode_i64(buf, + data_len, + pos, + &doc_rowkey_task_id_))) { + LOG_WARN("fail to deserialize doc rowkey task id", K(ret)); + } else if (OB_FAIL(serialization::decode_i64(buf, + data_len, + pos, + &fts_index_aux_task_id_))) { + LOG_WARN("fail to deserialize fts index aux task id", K(ret)); + } else if (OB_FAIL(serialization::decode_i64(buf, + data_len, + pos, + &fts_doc_word_task_id_))) { + LOG_WARN("fail to deserialize fts doc word task id", K(ret)); } else if (OB_FAIL(serialization::decode_i8(buf, data_len, pos, @@ -1283,11 +1208,47 @@ int ObFtsIndexBuildTask::deserialize_params_from_message( pos, &drop_index_task_id_))) { LOG_WARN("fail to deserialize drop fts index task id", K(ret)); + } else if (!dependent_task_result_map_.created() && + OB_FAIL(dependent_task_result_map_.create(num_fts_child_task, + lib::ObLabel("DepTasMap")))) { + LOG_WARN("create dependent task map failed", K(ret)); } else { - rowkey_doc_schema_generated_ = rowkey_doc_generated; - doc_rowkey_schema_generated_ = doc_rowkey_generated; - fts_index_aux_schema_generated_ = fts_index_aux_generated; - fts_doc_word_schema_generated_ = fts_doc_word_generated; + if (OB_SUCC(ret) && rowkey_doc_task_id_ != 0) { + share::ObDomainDependTaskStatus rowkey_doc_status; + rowkey_doc_status.task_id_ = rowkey_doc_task_id_; + if (OB_FAIL(dependent_task_result_map_.set_refactored(rowkey_doc_aux_table_id_, + rowkey_doc_status))) { + LOG_WARN("set dependent task map failed", K(ret), K(rowkey_doc_aux_table_id_), + K(rowkey_doc_status)); + } + } + if (OB_SUCC(ret) && doc_rowkey_task_id_ != 0) { + share::ObDomainDependTaskStatus doc_rowkey_status; + doc_rowkey_status.task_id_ = doc_rowkey_task_id_; + if (OB_FAIL(dependent_task_result_map_.set_refactored(doc_rowkey_aux_table_id_, + doc_rowkey_status))) { + LOG_WARN("set dependent task map failed", K(ret), K(doc_rowkey_aux_table_id_), + K(doc_rowkey_status)); + } + } + if (OB_SUCC(ret) && fts_index_aux_task_id_ != 0) { + share::ObDomainDependTaskStatus fts_index_aux_status; + fts_index_aux_status.task_id_ = fts_index_aux_task_id_; + if (OB_FAIL(dependent_task_result_map_.set_refactored(fts_index_aux_table_id_, + fts_index_aux_status))) { + LOG_WARN("set dependent task map failed", K(ret), K(fts_index_aux_table_id_), + K(fts_index_aux_status)); + } + } + if (OB_SUCC(ret) && fts_doc_word_task_id_ != 0) { + share::ObDomainDependTaskStatus fts_doc_word_status; + fts_doc_word_status.task_id_ = fts_doc_word_task_id_; + if (OB_FAIL(dependent_task_result_map_.set_refactored(fts_doc_word_aux_table_id_, + fts_doc_word_status))) { + LOG_WARN("set dependent task map failed", K(ret), K(fts_doc_word_aux_table_id_), + K(fts_doc_word_status)); + } + } rowkey_doc_task_submitted_ = rowkey_doc_submitted; doc_rowkey_task_submitted_ = doc_rowkey_submitted; fts_index_aux_task_submitted_ = fts_index_aux_submitted; @@ -1299,10 +1260,6 @@ int ObFtsIndexBuildTask::deserialize_params_from_message( int64_t ObFtsIndexBuildTask::get_serialize_param_size() const { - int8_t rowkey_doc_generated = static_cast(rowkey_doc_schema_generated_); - int8_t doc_rowkey_generated = static_cast(doc_rowkey_schema_generated_); - int8_t fts_index_aux_generated = static_cast(fts_index_aux_schema_generated_); - int8_t fts_doc_word_generated = static_cast(fts_doc_word_schema_generated_); int8_t rowkey_doc_submitted = static_cast(rowkey_doc_task_submitted_); int8_t doc_rowkey_submitted = static_cast(doc_rowkey_task_submitted_); int8_t fts_index_aux_submitted = static_cast(fts_index_aux_task_submitted_); @@ -1314,21 +1271,21 @@ int64_t ObFtsIndexBuildTask::get_serialize_param_size() const + serialization::encoded_length(doc_rowkey_aux_table_id_) + serialization::encoded_length(fts_index_aux_table_id_) + serialization::encoded_length(fts_doc_word_aux_table_id_) - + serialization::encoded_length_i8(rowkey_doc_generated) - + serialization::encoded_length_i8(doc_rowkey_generated) - + serialization::encoded_length_i8(fts_index_aux_generated) - + serialization::encoded_length_i8(fts_doc_word_generated) + serialization::encoded_length_i8(rowkey_doc_submitted) + serialization::encoded_length_i8(doc_rowkey_submitted) + serialization::encoded_length_i8(fts_index_aux_submitted) + serialization::encoded_length_i8(fts_doc_word_submitted) + + serialization::encoded_length_i64(rowkey_doc_task_id_) + + serialization::encoded_length_i64(doc_rowkey_task_id_) + + serialization::encoded_length_i64(fts_index_aux_task_id_) + + serialization::encoded_length_i64(fts_doc_word_task_id_) + serialization::encoded_length_i8(drop_index_submitted) + serialization::encoded_length_i64(drop_index_task_id_); } int ObFtsIndexBuildTask::clean_on_failed() { - using task_iter = common::hash::ObHashMap::const_iterator; + using task_iter = common::hash::ObHashMap::const_iterator; int ret = OB_SUCCESS; bool state_finished = false; if (OB_UNLIKELY(!is_inited_)) { @@ -1459,38 +1416,45 @@ int ObFtsIndexBuildTask::submit_drop_fts_index_task() LOG_WARN("fail to get table schema", K(ret), K(fts_doc_word_aux_table_id_)); } } - ObDDLTaskRecord task_record; - ObCreateDDLTaskParam param(tenant_id_, - ObDDLType::DDL_DROP_FTS_INDEX, - fts_index_aux_schema, - nullptr, - 0/*object_id*/, - fts_index_aux_schema->get_schema_version(), - parallelism_, - consumer_group_id_, - &allocator_); - param.tenant_data_version_ = data_format_version_; - param.aux_rowkey_doc_schema_ = rowkey_doc_schema; - param.aux_doc_rowkey_schema_ = doc_rowkey_schema; - param.aux_doc_word_schema_ = fts_doc_word_schema; - if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler(). - create_ddl_task(param, *GCTX.sql_proxy_, task_record))) { - if (OB_ENTRY_EXIST == ret) { - ret = OB_SUCCESS; - } else { - LOG_WARN("submit drop fts index ddl task failed", K(ret)); - } - } else if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler(). - schedule_ddl_task(task_record))) { - LOG_WARN("fail to schedule ddl task", K(ret), K(task_record)); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(fts_index_aux_schema)) { + // TODO hanxuan fix create drop fts index when fts index schema is not generated + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fts index aux schema is nullptr, fail to roll back", K(ret)); } else { - if (OB_SUCC(ret)) { - drop_index_task_submitted_ = true; - drop_index_task_id_ = task_record.task_id_; - LOG_INFO("add drop fts index task", K(ret), K(fts_index_aux_table_id_), - K(rowkey_doc_aux_table_id_), K(doc_rowkey_aux_table_id_), - K(fts_doc_word_aux_table_id_), K(create_index_arg_.index_name_), - K(fts_index_aux_schema->get_schema_version()), K(param.schema_version_)); + ObDDLTaskRecord task_record; + ObCreateDDLTaskParam param(tenant_id_, + ObDDLType::DDL_DROP_FTS_INDEX, + fts_index_aux_schema, + nullptr, + 0/*object_id*/, + fts_index_aux_schema->get_schema_version(), + parallelism_, + consumer_group_id_, + &allocator_); + param.tenant_data_version_ = data_format_version_; + param.aux_rowkey_doc_schema_ = rowkey_doc_schema; + param.aux_doc_rowkey_schema_ = doc_rowkey_schema; + param.aux_doc_word_schema_ = fts_doc_word_schema; + if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler(). + create_ddl_task(param, *GCTX.sql_proxy_, task_record))) { + if (OB_ENTRY_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("submit drop fts index ddl task failed", K(ret)); + } + } else if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler(). + schedule_ddl_task(task_record))) { + LOG_WARN("fail to schedule ddl task", K(ret), K(task_record)); + } else { + if (OB_SUCC(ret)) { + drop_index_task_submitted_ = true; + drop_index_task_id_ = task_record.task_id_; + LOG_INFO("add drop fts index task", K(ret), K(fts_index_aux_table_id_), + K(rowkey_doc_aux_table_id_), K(doc_rowkey_aux_table_id_), + K(fts_doc_word_aux_table_id_), K(create_index_arg_.index_name_), + K(fts_index_aux_schema->get_schema_version()), K(param.schema_version_)); + } } } } @@ -1596,7 +1560,6 @@ int ObFtsIndexBuildTask::cleanup_impl() ObMultiVersionSchemaService &schema_service = root_service_->get_schema_service(); ObSchemaGetterGuard schema_guard; const ObTableSchema *data_schema = nullptr; - const ObTableSchema *index_schema = nullptr; int64_t refreshed_schema_version = 0; ObTableLockOwnerID owner_id; ObMySQLTransaction trans; @@ -1607,20 +1570,17 @@ int ObFtsIndexBuildTask::cleanup_impl() data_table_id, data_schema))) { LOG_WARN("fail to get table schema", K(ret), K(data_table_id)); - } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, - index_table_id, - index_schema))) { - LOG_WARN("fail to get table schema", K(ret), K(index_table_id)); - } else if (OB_ISNULL(data_schema) || OB_ISNULL(index_schema)) { + } else if (OB_ISNULL(data_schema)) { ret = OB_TABLE_NOT_EXIST; - LOG_WARN("fail to get table schema", K(ret), KPC(data_schema), KPC(index_schema)); + LOG_WARN("fail to get table schema", K(ret), KPC(data_schema)); } else if (OB_FAIL(trans.start(&root_service_->get_sql_proxy(), dst_tenant_id_))) { LOG_WARN("start transaction failed", K(ret)); } else if (OB_FAIL(owner_id.convert_from_value(ObLockOwnerType::DEFAULT_OWNER_TYPE, task_id_))) { LOG_WARN("failed to get owner id", K(ret), K(task_id_)); } else if (OB_FAIL(ObDDLLock::unlock_for_add_drop_index(*data_schema, - *index_schema, + index_table_id, + false, owner_id, trans))) { LOG_WARN("failed to unlock online ddl lock", K(ret)); diff --git a/src/rootserver/ddl_task/ob_fts_index_build_task.h b/src/rootserver/ddl_task/ob_fts_index_build_task.h index 254ecb0777..b68c5a9f7c 100644 --- a/src/rootserver/ddl_task/ob_fts_index_build_task.h +++ b/src/rootserver/ddl_task/ob_fts_index_build_task.h @@ -13,7 +13,7 @@ #ifndef OCEANBASE_ROOTSERVER_OB_FTS_INDEX_BUILD_TASK_H_ #define OCEANBASE_ROOTSERVER_OB_FTS_INDEX_BUILD_TASK_H_ -#include "rootserver/ddl_task/ob_ddl_task.h" +#include "share/ob_domain_index_builder_util.h" namespace oceanbase { @@ -58,18 +58,26 @@ public: const int ret_code) override; TO_STRING_KV(K(index_table_id_), K(rowkey_doc_aux_table_id_), K(doc_rowkey_aux_table_id_), K(fts_index_aux_table_id_), - K(fts_doc_word_aux_table_id_), K(rowkey_doc_schema_generated_), - K(doc_rowkey_schema_generated_), K(fts_index_aux_schema_generated_), - K(fts_doc_word_schema_generated_), K(rowkey_doc_task_submitted_), + K(fts_doc_word_aux_table_id_), K(rowkey_doc_task_submitted_), K(doc_rowkey_task_submitted_), K(fts_index_aux_task_submitted_), - K(fts_doc_word_task_submitted_), K(drop_index_task_id_), + K(fts_doc_word_task_submitted_), K(rowkey_doc_task_id_), + K(doc_rowkey_task_id_), K(fts_index_aux_task_id_), + K(fts_doc_word_task_id_), K(drop_index_task_id_), K(drop_index_task_submitted_), K(schema_version_), K(execution_id_), K(consumer_group_id_), K(trace_id_), K(parallelism_), K(create_index_arg_)); private: int get_next_status(share::ObDDLTaskStatus &next_status); + int prepare_aux_table( + const ObIndexType index_type, + bool &task_submitted, + uint64_t &aux_table_id, + int64_t &task_id); int prepare_rowkey_doc_table(); int prepare_aux_index_tables(); + int construct_create_index_arg( + const ObIndexType index_type, + obrpc::ObCreateIndexArg &arg); int construct_rowkey_doc_arg(obrpc::ObCreateIndexArg &arg); int construct_doc_rowkey_arg(obrpc::ObCreateIndexArg &arg); int construct_fts_index_aux_arg(obrpc::ObCreateIndexArg &arg); @@ -102,18 +110,6 @@ private: obrpc::ObCreateIndexArg &dest_arg); private: - struct DependTaskStatus final - { - public: - DependTaskStatus() - : ret_code_(INT64_MAX), task_id_(0) - {} - ~DependTaskStatus() = default; - TO_STRING_KV(K_(task_id), K_(ret_code)); - public: - int64_t ret_code_; - int64_t task_id_; - }; static const int64_t OB_FTS_INDEX_BUILD_TASK_VERSION = 1; using ObDDLTask::tenant_id_; using ObDDLTask::task_id_; @@ -131,19 +127,19 @@ private: uint64_t doc_rowkey_aux_table_id_; uint64_t fts_index_aux_table_id_; uint64_t fts_doc_word_aux_table_id_; - bool rowkey_doc_schema_generated_; - bool doc_rowkey_schema_generated_; - bool fts_index_aux_schema_generated_; - bool fts_doc_word_schema_generated_; bool rowkey_doc_task_submitted_; bool doc_rowkey_task_submitted_; bool fts_index_aux_task_submitted_; bool fts_doc_word_task_submitted_; + int64_t rowkey_doc_task_id_; + int64_t doc_rowkey_task_id_; + int64_t fts_index_aux_task_id_; + int64_t fts_doc_word_task_id_; int64_t drop_index_task_id_; bool drop_index_task_submitted_; ObRootService *root_service_; obrpc::ObCreateIndexArg create_index_arg_; - common::hash::ObHashMap dependent_task_result_map_; + common::hash::ObHashMap dependent_task_result_map_; }; } // end namespace rootserver diff --git a/src/rootserver/ddl_task/ob_index_build_task.cpp b/src/rootserver/ddl_task/ob_index_build_task.cpp index f96504cc69..06c70e87f0 100755 --- a/src/rootserver/ddl_task/ob_index_build_task.cpp +++ b/src/rootserver/ddl_task/ob_index_build_task.cpp @@ -1007,7 +1007,7 @@ bool ObIndexBuildTask::is_create_partitioned_local_index() int ObIndexBuildTask::wait_data_complement() { int ret = OB_SUCCESS; - // temporary bypass data complement for fts index + // temporary bypass data complement for fts index if (share::schema::is_fts_index(create_index_arg_.index_type_)) { (void)switch_status(ObDDLTaskStatus::VALIDATE_CHECKSUM, true, ret); LOG_INFO("wait data complement finished", K(ret), K(*this)); @@ -1052,7 +1052,7 @@ int ObIndexBuildTask::wait_data_complement() state_finished = true; } } - if (OB_SUCC(ret) && state_finished && !create_index_arg_.is_spatial_index()) { + if (OB_SUCC(ret) && state_finished && !create_index_arg_.is_spatial_index() && !create_index_arg_.is_vec_index()) { bool dummy_equal = false; bool need_verify_checksum = true; #ifdef ERRSIM @@ -1113,7 +1113,7 @@ int ObIndexBuildTask::wait_local_index_data_complement() state_finished = true; } } - if (OB_SUCC(ret) && state_finished && !create_index_arg_.is_spatial_index()) { + if (OB_SUCC(ret) && state_finished && !create_index_arg_.is_spatial_index() && !create_index_arg_.is_vec_index()) { bool dummy_equal = false; if (OB_FAIL(ObDDLChecksumOperator::check_column_checksum_without_execution_id( tenant_id_, object_id_, index_table_id_, task_id_, false/*index build*/, dummy_equal, root_service_->get_sql_proxy()))) { @@ -1173,7 +1173,7 @@ int ObIndexBuildTask::check_need_verify_checksum(bool &need_verify) LOG_WARN("not init", K(ret)); } else if (is_unique_index_) { need_verify = true; - } else if (create_index_arg_.is_spatial_index()) { + } else if (create_index_arg_.is_spatial_index() || create_index_arg_.is_vec_index()) { need_verify = false; } else { ObSchemaGetterGuard schema_guard; @@ -1483,7 +1483,12 @@ int ObIndexBuildTask::update_index_status_in_schema(const ObTableSchema &index_s if (INDEX_STATUS_AVAILABLE == new_status) { // For create index syntax, create_index_arg_ will record the user sql, and generate the ddl_stmt_str when nabling index. // For alter table add index syntax, create_index_arg_ will not record the user sql, and generate the ddl_stmt_str when generating index schema. - arg.ddl_stmt_str_ = create_index_arg_.ddl_stmt_str_; + if (index_schema.is_vec_index() && !index_schema.is_vec_delta_buffer_type()) { + // do nothing + // For create fts index, just record one ddl_stmt_str in the delta buf table which the user can see + } else { + arg.ddl_stmt_str_ = create_index_arg_.ddl_stmt_str_; + } } DEBUG_SYNC(BEFORE_UPDATE_GLOBAL_INDEX_STATUS); diff --git a/src/rootserver/ddl_task/ob_rebuild_index_task.cpp b/src/rootserver/ddl_task/ob_rebuild_index_task.cpp new file mode 100644 index 0000000000..192337ef80 --- /dev/null +++ b/src/rootserver/ddl_task/ob_rebuild_index_task.cpp @@ -0,0 +1,754 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX RS + +#include "ob_rebuild_index_task.h" +#include "share/schema/ob_multi_version_schema_service.h" +#include "share/ob_ddl_error_message_table_operator.h" +#include "share/ob_ddl_sim_point.h" +#include "rootserver/ob_root_service.h" +#include "lib/timezone/ob_timezone_info.h" // for ObTimeZoneInfoWrap +#include "observer/omt/ob_tenant_timezone_mgr.h" // for OTTZ_MGR + +using namespace oceanbase::rootserver; +using namespace oceanbase::common; +using namespace oceanbase::common::sqlclient; +using namespace oceanbase::obrpc; +using namespace oceanbase::share; +using namespace oceanbase::share::schema; +using namespace oceanbase::sql; + +ObRebuildIndexTask::ObRebuildIndexTask() + : ObDDLTask(DDL_REBUILD_INDEX), rebuild_index_arg_(), index_build_task_id_(-1), index_drop_task_id_(-1), new_index_id_(OB_INVALID_ID) +{ +} + +ObRebuildIndexTask::~ObRebuildIndexTask() +{ +} + +int ObRebuildIndexTask::init( + const uint64_t tenant_id, + const int64_t task_id, + const share::ObDDLType &ddl_type, + const uint64_t data_table_id, + const uint64_t index_table_id, // delta_buffer_table table id + const int64_t schema_version, + const int64_t parent_task_id, + const int64_t consumer_group_id, + const int32_t sub_task_trace_id, + const int64_t parallelism, + const uint64_t tenant_data_version, + const obrpc::ObRebuildIndexArg &rebuild_index_arg) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || task_id <= 0 || OB_INVALID_ID == data_table_id + || OB_INVALID_ID == index_table_id || schema_version <= 0 || parent_task_id < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", KR(ret), K(tenant_id), K(task_id), K(data_table_id), + K(index_table_id), K(schema_version), K(parent_task_id)); + } else if (OB_ISNULL(root_service_ = GCTX.root_service_)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys, root service is null", KR(ret)); + } else if (OB_FAIL(deep_copy_index_arg(allocator_, rebuild_index_arg, rebuild_index_arg_))) { + LOG_WARN("deep copy drop index arg failed", KR(ret)); + } else { + tenant_id_ = tenant_id; + object_id_ = data_table_id; + target_object_id_ = index_table_id; + schema_version_ = schema_version; + task_id_ = task_id; + task_type_ = ddl_type; + parent_task_id_ = parent_task_id; + consumer_group_id_ = consumer_group_id; + sub_task_trace_id_ = sub_task_trace_id; + task_version_ = OB_REBUILD_INDEX_TASK_VERSION; + dst_tenant_id_ = tenant_id_; + dst_schema_version_ = schema_version_; + data_format_version_ = tenant_data_version; + parallelism_ = parallelism; + is_inited_ = true; + ddl_tracing_.open(); + } + return ret; +} + +int ObRebuildIndexTask::init( + const ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + if (OB_UNLIKELY(!task_record.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", KR(ret), K(task_record)); + } else if (OB_ISNULL(root_service_ = GCTX.root_service_)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys, root service is null", KR(ret)); + } else { + tenant_id_ = task_record.tenant_id_; + object_id_ = task_record.object_id_; + target_object_id_ = task_record.target_object_id_; + schema_version_ = task_record.schema_version_; + task_id_ = task_record.task_id_; + parent_task_id_ = task_record.parent_task_id_; + task_version_ = task_record.task_version_; + ret_code_ = task_record.ret_code_; + dst_tenant_id_ = tenant_id_; + dst_schema_version_ = schema_version_; + task_type_ = task_record.ddl_type_; + if (nullptr != task_record.message_.ptr()) { + int64_t pos = 0; + if (OB_FAIL(deserialize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { + LOG_WARN("deserialize params from message failed", KR(ret)); + } + } + if (OB_FAIL(ret)) { + } else { + is_inited_ = true; + // set up span during recover task + ddl_tracing_.open_for_recovery(); + } + } + return ret; +} + +bool ObRebuildIndexTask::is_valid() const +{ + return is_inited_ && !trace_id_.is_invalid(); +} + +int ObRebuildIndexTask::prepare(const ObDDLTaskStatus new_status) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObRebuildIndexTask has not been inited", KR(ret)); + } else if (ObDDLTaskStatus::PREPARE != task_status_) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task status not match", K(ret), K(task_status_)); + } else if (OB_FAIL(switch_status(new_status, true, ret))) { + LOG_WARN("switch status failed", KR(ret)); + } + return ret; +} + +/* + Drop new index table on failure, drop old index table on success. +*/ +int ObRebuildIndexTask::drop_index_impl(const bool is_drop_old_index) +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + const ObDatabaseSchema *database_schema = nullptr; + const ObTableSchema *data_table_schema = nullptr; + ObSqlString drop_index_sql; + ObString index_name; + const ObTableSchema *index_schema = nullptr; + uint64_t drop_index_id = is_drop_old_index ? target_object_id_ : new_index_id_; + if (OB_ISNULL(root_service_)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys, root_service is nullptr", KR(ret)); + } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance(). + get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("get tenant schema failed", KR(ret), K(tenant_id_)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, drop_index_id, index_schema))) { + LOG_WARN("get index schema failed", KR(ret), K(drop_index_id)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("index schema is null", KR(ret), K(drop_index_id)); + } else if (OB_FAIL(index_schema->get_index_name(index_name))) { + LOG_WARN("get index name failed", KR(ret), K(index_schema->get_table_type()), KPC(index_schema)); + } else if (OB_FAIL(schema_guard.get_database_schema(tenant_id_, index_schema->get_database_id(), database_schema))) { + LOG_WARN("get database schema failed", KR(ret), K(index_schema->get_database_id())); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, index_schema->get_data_table_id(), data_table_schema))) { + LOG_WARN("get data table schema failed", KR(ret), K(index_schema->get_data_table_id())); + } else if (OB_UNLIKELY(nullptr == database_schema || nullptr == data_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null schema", KR(ret), KP(database_schema), KP(data_table_schema)); + } else { + // we set the drop_index_arg.index_name_ as index_name is following the reason: + // 1. In the success process, the index table and the new index table have already swapped names. + // At this point, the index_name of the old index that needs to be deleted should be the new index name. + // 2. In the failure process, the new table needs to be deleted, and the new table and the old table have not swapped names. + // At this point, the index_name is also the old table name. + int64_t ddl_rpc_timeout = 0; + obrpc::ObDropIndexArg drop_index_arg; + obrpc::ObDropIndexRes drop_index_res; + drop_index_arg.is_inner_ = true; // send to rs and set is_inner_ is true to submit drop vec index ddl task。RS need get all assistant index table to drop + drop_index_arg.tenant_id_ = tenant_id_; + drop_index_arg.exec_tenant_id_ = tenant_id_; + drop_index_arg.index_table_id_ = drop_index_id; // The ID of table 3 in the vector index needs to be deleted. + drop_index_arg.session_id_ = data_table_schema->get_session_id(); + drop_index_arg.index_name_ = index_name; + drop_index_arg.table_name_ = data_table_schema->get_table_name(); + drop_index_arg.database_name_ = database_schema->get_database_name_str(); + drop_index_arg.index_action_type_ = obrpc::ObIndexArg::DROP_INDEX; + drop_index_arg.is_add_to_scheduler_ = true; + drop_index_arg.task_id_ = task_id_; + drop_index_arg.is_vec_inner_drop_ = true; + if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout(index_schema->get_all_part_num() + data_table_schema->get_all_part_num(), ddl_rpc_timeout))) { + LOG_WARN("failed to get ddl rpc timeout", KR(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DROP_INDEX_RPC_FAILED))) { + LOG_WARN("ddl sim failure", KR(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(root_service_->get_common_rpc_proxy().timeout(ddl_rpc_timeout).drop_index(drop_index_arg, drop_index_res))) { + LOG_WARN("drop index failed", KR(ret), K(ddl_rpc_timeout)); + } else { + index_drop_task_id_ = drop_index_res.task_id_; + if (OB_FAIL(update_task_message())) { + LOG_WARN("fail to update index_drop_task_id_ to __all_ddl_task_status"); + } + LOG_INFO("success to submit drop vector index task", K(ret), K(index_drop_task_id_)); + } + } + return ret; +} + +int ObRebuildIndexTask::rebuild_index_impl() +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + bool is_db_in_recyclebin = false; + const ObTableSchema *table_schema = nullptr; + const ObTableSchema *index_schema = nullptr; + if (OB_ISNULL(root_service_)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys, root_service is nullptr", KR(ret)); + } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance(). + get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("get tenant schema failed", KR(ret), K(tenant_id_)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, target_object_id_, index_schema))) { + LOG_WARN("get index schema failed", KR(ret), K(target_object_id_)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("index schema is null", KR(ret), K(target_object_id_)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, index_schema->get_data_table_id(), table_schema))) { + LOG_WARN("get data table schema failed", KR(ret), K(index_schema->get_data_table_id())); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null schema", KR(ret), KP(table_schema)); + } else if (table_schema->is_in_recyclebin()) { + ret = OB_ERR_OPERATION_ON_RECYCLE_OBJECT; + LOG_WARN("can not create index of table in recyclebin.", KR(ret), K(table_schema)); + } else if (OB_FAIL(schema_guard.check_database_in_recyclebin(tenant_id_, + table_schema->get_database_id(), + is_db_in_recyclebin))) { + LOG_WARN("check database in recyclebin failed", KR(ret), K(tenant_id_)); + } else if (is_db_in_recyclebin) { + ret = OB_ERR_OPERATION_ON_RECYCLE_OBJECT; + LOG_WARN("Can not truncate index of db in recyclebin", KR(ret)); + } else { + // parameters description: + // 1. create_index_arg.index_name_ + // The naming convention for creating the new table: assuming new index name ='idx1', the name of the new table 3 will be __idx_{datatable_id}_idx1. + // 2. create_index_arg.index_table_id_ + // The ID of the old table 3 is needed to find the old table schema when creating the index, in order to assign the schema to the new table. + SMART_VAR(obrpc::ObCreateIndexArg, create_index_arg) { + obrpc::ObAlterTableRes res; + int64_t ddl_rpc_timeout = 0; + ObRootService *root_service = GCTX.root_service_; + create_index_arg.index_type_ = index_schema->get_index_type(); + create_index_arg.index_name_ = rebuild_index_arg_.index_name_; // new index name was generated at ddl_service of rebuild_vec_index func + create_index_arg.index_table_id_ = target_object_id_; // old table 3 index ID; + create_index_arg.database_name_ = rebuild_index_arg_.database_name_; + create_index_arg.is_rebuild_index_ = true; + create_index_arg.tenant_id_ = tenant_id_; + create_index_arg.exec_tenant_id_ = tenant_id_; + create_index_arg.table_name_ = table_schema->get_table_name(); + create_index_arg.index_action_type_ = obrpc::ObIndexArg::ADD_INDEX; + create_index_arg.parallelism_ = parallelism_; + create_index_arg.is_inner_ = true; // is ddl task inner task + ObColumnSortItem empty_item; + create_index_arg.index_using_type_ = USING_BTREE; + create_index_arg.index_columns_.push_back(empty_item); + create_index_arg.index_option_.block_size_ = 1; + create_index_arg.index_option_.index_status_ = INDEX_STATUS_UNAVAILABLE; + create_index_arg.index_option_.progressive_merge_num_ = 1; + + if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout(tenant_id_, target_object_id_, ddl_rpc_timeout))) { + LOG_WARN("get ddl rpc timeout failed", K(ret)); + } else if (OB_FAIL(root_service_->get_common_rpc_proxy().timeout(ddl_rpc_timeout).create_index(create_index_arg, res))) { + LOG_WARN("fail to create vec index", K(ret), K(create_index_arg)); + } else { + index_build_task_id_ = res.task_id_; // create vector index task ID + new_index_id_ = res.index_table_id_; // new table 3 index ID + LOG_INFO("success to create rebuild index task", K(ret), K(index_build_task_id_), K(new_index_id_), K(create_index_arg)); + if (OB_FAIL(update_task_message())) { + LOG_WARN("fail to update index_build_task_id to __all_ddl_task_status"); + } + } + } + } + return ret; +} + +int ObRebuildIndexTask::create_and_wait_rebuild_task_finish(const ObDDLTaskStatus new_status) +{ + int ret = OB_SUCCESS; + bool state_finished = false; + DEBUG_SYNC(REBUILD_VEC_INDEX_WAIT_CREATE_NEW_INDEX); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", KR(ret)); + } else if (ObDDLTaskStatus::REBUILD_SCHEMA != task_status_) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task status not match", KR(ret), K(task_status_)); + } else if (-1 == index_build_task_id_ && OB_FAIL(rebuild_index_impl())) { + LOG_WARN("send drop index rpc failed", KR(ret)); + } else if (OB_FAIL(check_ddl_task_finish(tenant_id_, index_build_task_id_, state_finished))) { + LOG_WARN("check ddl task finish failed", K(ret), K(index_build_task_id_)); + } + if (state_finished || OB_FAIL(ret)) { + (void)switch_status(new_status, true, ret); + LOG_INFO("rebuild_index_task wait_child_task_finish finished", KR(ret), K(*this)); + } + + return ret; +} + +int ObRebuildIndexTask::update_task_message() +{ + int ret = OB_SUCCESS; + char *buf = nullptr; + int64_t pos = 0; + ObString msg; + common::ObArenaAllocator allocator("ObVecReBuild"); + const int64_t serialize_param_size = get_serialize_param_size(); + + if (OB_ISNULL(buf = static_cast(allocator.alloc(serialize_param_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory", KR(ret), K(serialize_param_size)); + } else if (OB_FAIL(serialize_params_to_message(buf, serialize_param_size, pos))) { + LOG_WARN("failed to serialize params to message", KR(ret)); + } else { + msg.assign(buf, serialize_param_size); + if (OB_FAIL(ObDDLTaskRecordOperator::update_message(root_service_->get_sql_proxy(), tenant_id_, task_id_, msg))) { + LOG_WARN("failed to update message", KR(ret)); + } + } + return ret; +} + +int ObRebuildIndexTask::get_new_index_table_id( + ObSchemaGetterGuard &schema_guard, + const int64_t tenant_id, + const int64_t database_id, + const int64_t data_table_id, + const ObString &index_name, + int64_t &index_id) +{ + int ret = OB_SUCCESS; + + char full_index_name_buf[OB_MAX_TABLE_NAME_LENGTH]; + const ObTableSchema *new_index_schema = nullptr; + const bool is_index = true; + ObString new_index_name; + int64_t pos = 0; + if (index_name.empty() || tenant_id == OB_INVALID_ID || + data_table_id == OB_INVALID_ID || database_id == OB_INVALID_ID) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid argument", + K(ret), K(index_name), K(tenant_id), K(data_table_id), K(database_id)); + } else if (OB_FAIL(databuff_printf(full_index_name_buf, + OB_MAX_TABLE_NAME_LENGTH, + pos, + "__idx_%lu_%.*s", + data_table_id, + index_name.length(), + index_name.ptr()))) { + LOG_WARN("fail to printf current time", K(ret)); + } else if (OB_FALSE_IT(new_index_name.assign_ptr(full_index_name_buf, + static_cast(pos)))) { + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, + database_id, + new_index_name, + is_index, + new_index_schema))) { + LOG_WARN("fail to get table schema", K(ret), K(new_index_name)); + } else if (OB_ISNULL(new_index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(tenant_id), K(database_id), K(new_index_name)); + } else if (!new_index_schema->is_vec_index()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected, here should be vector index schema", K(ret), K(new_index_schema)); + } else if (new_index_schema->is_unavailable_index()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected of new index status is unaveliable", KR(ret)); + } else { + index_id = new_index_schema->get_table_id(); + } + return ret; +} + +int ObRebuildIndexTask::switch_index_name(const ObDDLTaskStatus next_task_status) +{ + int ret = OB_SUCCESS; + bool state_finished = false; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *index_schema = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", KR(ret)); + } else if (ObDDLTaskStatus::SWITCH_INDEX_NAME != task_status_) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task status not match", KR(ret), K(task_status_)); + } else if (OB_ISNULL(root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret)); + } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance(). + get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("get tenant schema failed", KR(ret), K(tenant_id_)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, target_object_id_, index_schema))) { + LOG_WARN("get old index schema failed", KR(ret), K(target_object_id_)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("index schema is null", KR(ret), K(target_object_id_)); + } else { + int64_t rpc_timeout = 0; + ObRootService *root_service = GCTX.root_service_; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *table_schema = nullptr; + ObDDLTaskStatus new_status = next_task_status; + ObSArray unused_ids; + const ObString origin_database_name = rebuild_index_arg_.database_name_; + const ObString origin_table_name = index_schema->get_table_name(); + ObTZMapWrap tz_map_wrap; + + SMART_VAR(obrpc::ObAlterTableArg, alter_table_arg) { + alter_table_arg.alter_table_schema_.set_tenant_id(tenant_id_); + alter_table_arg.alter_table_schema_.set_origin_database_name(origin_database_name); + alter_table_arg.alter_table_schema_.set_origin_table_name(origin_table_name); + alter_table_arg.ddl_task_type_ = share::SWITCH_VEC_INDEX_NAME_TASK; + alter_table_arg.table_id_ = target_object_id_; // Old index id, the id of the old table number 3. + alter_table_arg.hidden_table_id_ = new_index_id_; // New index id, the id of the new table number 3, obtained after rebuilding the index. + alter_table_arg.task_id_ = task_id_; // rebuild index task id + alter_table_arg.tz_info_wrap_.set_tz_info_offset(0); + alter_table_arg.nls_formats_[ObNLSFormatEnum::NLS_DATE] = ObTimeConverter::COMPAT_OLD_NLS_DATE_FORMAT; + alter_table_arg.nls_formats_[ObNLSFormatEnum::NLS_TIMESTAMP] = ObTimeConverter::COMPAT_OLD_NLS_TIMESTAMP_FORMAT; + alter_table_arg.nls_formats_[ObNLSFormatEnum::NLS_TIMESTAMP_TZ] = ObTimeConverter::COMPAT_OLD_NLS_TIMESTAMP_TZ_FORMAT; + alter_table_arg.exec_tenant_id_ = tenant_id_; + alter_table_arg.compat_mode_ = lib::Worker::CompatMode::MYSQL; + if (OB_ISNULL(root_service)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_SWITCH_INDEX_NAME_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(OTTZ_MGR.get_tenant_tz(tenant_id_, tz_map_wrap))) { + LOG_WARN("get tenant timezone map failed", K(ret), K(tenant_id_)); + } else if (FALSE_IT(alter_table_arg.set_tz_info_map(tz_map_wrap.get_tz_map()))) { + } else if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout(tenant_id_, target_object_id_, rpc_timeout))) { + LOG_WARN("get ddl rpc timeout failed", K(ret)); + } else if (OB_FAIL(root_service->get_ddl_service().get_common_rpc()->to(obrpc::ObRpcProxy::myaddr_).timeout(rpc_timeout). + execute_ddl_task(alter_table_arg, unused_ids))) { + LOG_WARN("fail to swap original and hidden table state", K(ret)); + } else { + LOG_INFO("success to switch and drop old index", K(ret), K(index_drop_task_id_)); + } + if (new_status == next_task_status || OB_FAIL(ret)) { + if (OB_FAIL(switch_status(next_task_status, true, ret))) { + LOG_WARN("fail to switch status", K(ret)); + } + } + } + LOG_DEBUG("switch_index_name finish", K(ret), K(task_id_), K(target_object_id_), K(new_index_id_), K(index_drop_task_id_)); + } + return ret; +} + +int ObRebuildIndexTask::create_and_wait_drop_task_finish(const ObDDLTaskStatus new_status) +{ + int ret = OB_SUCCESS; + DEBUG_SYNC(REBUILD_VEC_INDEX_WAIT_DROP_OLD_INDEX); + // Although the names of the new and old indexes have been swapped, the table ID has not changed, so the old index still needs to be dropped. + const bool is_drop_old_index = true; + bool state_finished = false; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", KR(ret)); + } else if (ObDDLTaskStatus::DROP_SCHEMA != task_status_) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task status not match", KR(ret), K(task_status_)); + } else if (-1 == index_drop_task_id_ && OB_FAIL(drop_index_impl(is_drop_old_index))) { + LOG_WARN("fail to build drop index task", K(ret)); + } else if (-1 == index_drop_task_id_) { + state_finished = true; + LOG_INFO("submit drop index task return task_id is -1", K(ret), K(index_drop_task_id_)); + } else if (OB_FAIL(check_ddl_task_finish(tenant_id_, index_drop_task_id_, state_finished))) { + LOG_WARN("check drop task finish task failed", K(ret), K(state_finished)); + } + if (state_finished || OB_FAIL(ret)) { + (void)switch_status(new_status, true, ret); + LOG_INFO("rebuild_index_task wait_drop_task_finish finished", KR(ret), K(*this)); + } + return ret; +} + +/* + If the DDL task is completed, the DDL task ID will be reset to zero here. +*/ +int ObRebuildIndexTask::check_ddl_task_finish(const int64_t tenant_id, int64_t &child_task_id, bool &is_finished) +{ + int ret = OB_SUCCESS; + is_finished = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", KR(ret)); + } else if (OB_UNLIKELY(child_task_id == OB_INVALID_ID || tenant_id == OB_INVALID_ID)) { + ret = OB_INVALID_ARGUMENT; + LOG_INFO("invalid argument", K(ret), K(child_task_id), K(tenant_id)); + } else { + int64_t unused_user_msg_len = 0; + const int64_t target_object_id = -1; + const ObAddr unused_addr; + ObDDLErrorMessageTableOperator::ObBuildDDLErrorMessage error_message; + if (OB_FAIL(ObDDLErrorMessageTableOperator::get_ddl_error_message(tenant_id, + child_task_id, + target_object_id, + unused_addr, + false /* is_ddl_retry_task */, + *GCTX.sql_proxy_, + error_message, + unused_user_msg_len))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + LOG_DEBUG("ddl task not finish", K(ret), K(tenant_id), K(child_task_id), K(task_id_)); + } else { + LOG_WARN("fail to get ddl error message", K(ret), K(tenant_id), K(child_task_id), K(task_id_)); + } + } else { + ret = error_message.ret_code_; + is_finished = true; + LOG_INFO("succ to wait task finish", K(ret)); + } + } + return ret; +} + +int ObRebuildIndexTask::succ() +{ + return cleanup(); +} + +/* + 1. If the deletion of the old table fails in the above logic, + then wait for the completion of the old table deletion process and no need to drop new table + 2. If it is not because the deletion of the old table failed, then here triggers the drop task of the new table. +*/ +int ObRebuildIndexTask::fail() +{ + int ret = OB_SUCCESS; + bool is_drop_old_index = false; + bool is_finished = false; + if (-1 == index_drop_task_id_ && OB_FAIL(drop_index_impl(is_drop_old_index))) { + LOG_WARN("drop index impl failed", KR(ret)); + } else if (-1 == index_drop_task_id_ ) { + is_finished = true; + LOG_INFO("submit drop index task return task_id is -1", K(ret), K(index_drop_task_id_)); + } else if (OB_FAIL(check_ddl_task_finish(tenant_id_, index_drop_task_id_, is_finished))) { + LOG_WARN("fail to check drop index task finished", K(ret)); + } + if (OB_FAIL(ret) || is_finished) { + if (OB_FAIL(cleanup())) { + LOG_WARN("cleanup failed", KR(ret)); + } + } + return ret; +} + +int ObRebuildIndexTask::cleanup_impl() +{ + int ret = OB_SUCCESS; + ObString unused_str; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", KR(ret)); + } else { + ObMultiVersionSchemaService &schema_service = root_service_->get_schema_service(); + ObSchemaGetterGuard schema_guard; + const ObTableSchema *data_schema = nullptr; + ObTableLockOwnerID owner_id; + ObMySQLTransaction trans; + const int64_t old_index_table_id = OB_INVALID_ID; + const int64_t new_index_table_id = OB_INVALID_ID; + const bool is_global_vector_index = false; + if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("get tenant schema guard failed", K(ret), K(tenant_id_)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, object_id_, data_schema))) { + LOG_WARN("fail to get table schema", K(ret), K(object_id_)); + } else if (OB_ISNULL(data_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("fail to get table schema", K(ret), KPC(data_schema)); + } else if (OB_FAIL(trans.start(&root_service_->get_sql_proxy(), tenant_id_))) { + LOG_WARN("start transaction failed", K(ret)); + } else if (OB_FAIL(owner_id.convert_from_value(ObLockOwnerType::DEFAULT_OWNER_TYPE, task_id_))) { + LOG_WARN("failed to get owner id", K(ret), K(task_id_)); + } else if (OB_FAIL(ObDDLLock::unlock_for_rebuild_index(*data_schema, + old_index_table_id, + new_index_table_id, + is_global_vector_index, + owner_id, + trans))) { + LOG_WARN("failed to unlock rebuild index ddl", K(ret), K(task_id_)); + } + if (trans.is_started()) { + int tmp_ret = trans.end(true/*commit*/); + if (OB_SUCCESS != tmp_ret) { + LOG_WARN("trans end failed", "is_commit", OB_SUCCESS == ret, K(tmp_ret)); + ret = (OB_SUCCESS == ret) ? tmp_ret : ret; + } + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(report_error_code(unused_str))) { + LOG_WARN("report error code failed", KR(ret)); + } else if (OB_FAIL(ObDDLTaskRecordOperator::delete_record(root_service_->get_sql_proxy(), tenant_id_, task_id_))) { + LOG_WARN("delete task record failed", KR(ret), K(task_id_), K(schema_version_)); + } else { + need_retry_ = false; // clean succ, stop the task + } + if (OB_SUCC(ret) && parent_task_id_ > 0) { + const ObDDLTaskID parent_task_id(tenant_id_, parent_task_id_); + root_service_->get_ddl_task_scheduler().on_ddl_task_finish(parent_task_id, get_task_key(), ret_code_, trace_id_); + } + LOG_INFO("clean task finished", KR(ret), K(*this)); + return ret; +} + +int ObRebuildIndexTask::process() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObRebuildIndexTask has not been inited", KR(ret)); + } else if (!need_retry()) { + // task is done + } else { + ddl_tracing_.restore_span_hierarchy(); + const ObDDLTaskStatus status = static_cast(task_status_); + switch (status) { + case ObDDLTaskStatus::PREPARE: + if (OB_FAIL(prepare(REBUILD_SCHEMA))) { + LOG_WARN("prepare failed", KR(ret)); + } + break; + case ObDDLTaskStatus::REBUILD_SCHEMA: + if (OB_FAIL(create_and_wait_rebuild_task_finish(SWITCH_INDEX_NAME))) { + LOG_WARN("rebuild index failed", KR(ret)); + } + break; + case ObDDLTaskStatus::SWITCH_INDEX_NAME: + if (OB_FAIL(switch_index_name(DROP_SCHEMA))) { + LOG_WARN("switch index status failed", K(ret)); + } + break; + case ObDDLTaskStatus::DROP_SCHEMA: + if (OB_FAIL(create_and_wait_drop_task_finish(SUCCESS))) { + LOG_WARN("switch index status failed", K(ret)); + } + break; + case ObDDLTaskStatus::SUCCESS: + if (OB_FAIL(succ())) { + LOG_WARN("do succ procedure failed", KR(ret)); + } + break; + case ObDDLTaskStatus::FAIL: + if (OB_FAIL(fail())) { + LOG_WARN("do fail procedure failed", KR(ret)); + } + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error unexpected, task status is not valid", KR(ret), K(task_status_)); + } + ddl_tracing_.release_span_hierarchy(); + if (OB_FAIL(ret)) { + add_event_info("rebuild index task process fail"); + LOG_INFO("rebuild index task process fail", "ddl_event_info", ObDDLEventInfo()); + } + } + return ret; +} + +int ObRebuildIndexTask::deep_copy_index_arg( + common::ObIAllocator &allocator, + const obrpc::ObRebuildIndexArg &src_index_arg, + obrpc::ObRebuildIndexArg &dst_index_arg) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + char *buf = nullptr; + const int64_t serialize_size = src_index_arg.get_serialize_size(); + if (OB_ISNULL(buf = static_cast(allocator.alloc(serialize_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc memory failed", KR(ret), K(serialize_size)); + } else if (OB_FAIL(src_index_arg.serialize(buf, serialize_size, pos))) { + LOG_WARN("serialize source index arg failed", KR(ret)); + } else if (OB_FALSE_IT(pos = 0)) { + } else if (OB_FAIL(dst_index_arg.deserialize(buf, serialize_size, pos))) { + LOG_WARN("deserialize failed", KR(ret)); + } + if (OB_FAIL(ret) && nullptr != buf) { + allocator.free(buf); + } + + return ret; +} + +int ObRebuildIndexTask::serialize_params_to_message(char *buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr == buf || buf_len <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", KR(ret), KP(buf), K(buf_len)); + } else if (OB_FAIL(ObDDLTask::serialize_params_to_message(buf, buf_len, pos))) { + LOG_WARN("ObDDLTask serialize failed", KR(ret)); + } else if (OB_FAIL(rebuild_index_arg_.serialize(buf, buf_len, pos))) { + LOG_WARN("serialize failed", KR(ret)); + } else { + LST_DO_CODE(OB_UNIS_ENCODE, index_build_task_id_, index_drop_task_id_, new_index_id_); + } + return ret; +} + +int ObRebuildIndexTask::deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t data_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + obrpc::ObRebuildIndexArg tmp_rebuild_index_arg; + if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || nullptr == buf || data_len <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", KR(ret), K(tenant_id), KP(buf), K(data_len)); + } else if (OB_FAIL(ObDDLTask::deserialize_params_from_message(tenant_id, buf, data_len, pos))) { + LOG_WARN("ObDDLTask deserlize failed", KR(ret)); + } else if (OB_FAIL(tmp_rebuild_index_arg.deserialize(buf, data_len, pos))) { + LOG_WARN("deserialize failed", KR(ret)); + } else if (OB_FAIL(ObDDLUtil::replace_user_tenant_id(tenant_id, tmp_rebuild_index_arg))) { + LOG_WARN("replace user tenant id failed", KR(ret), K(tenant_id), K(tmp_rebuild_index_arg)); + } else if (OB_FAIL(deep_copy_index_arg(allocator_, tmp_rebuild_index_arg, rebuild_index_arg_))) { + LOG_WARN("deep copy drop index arg failed", KR(ret)); + } else { + LST_DO_CODE(OB_UNIS_DECODE, index_build_task_id_, index_drop_task_id_, new_index_id_); + } + return ret; +} + +int64_t ObRebuildIndexTask::get_serialize_param_size() const +{ + int ret = OB_SUCCESS; + int len = 0; + len += ObDDLTask::get_serialize_param_size(); + len += rebuild_index_arg_.get_serialize_size(); + LST_DO_CODE(OB_UNIS_ADD_LEN, + index_build_task_id_, + index_drop_task_id_, + new_index_id_); + return len; +} diff --git a/src/rootserver/ddl_task/ob_rebuild_index_task.h b/src/rootserver/ddl_task/ob_rebuild_index_task.h new file mode 100644 index 0000000000..6031a31074 --- /dev/null +++ b/src/rootserver/ddl_task/ob_rebuild_index_task.h @@ -0,0 +1,99 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_ROOTSERVER_OB_REBUILD_INDEX_TASK_H +#define OCEANBASE_ROOTSERVER_OB_REBUILD_INDEX_TASK_H + +#include "rootserver/ddl_task/ob_ddl_task.h" + +namespace oceanbase +{ +namespace rootserver +{ + +class ObRebuildIndexTask : public ObDDLTask +{ +public: + ObRebuildIndexTask(); + virtual ~ObRebuildIndexTask(); + int init( + const uint64_t tenant_id, + const int64_t task_id, + const share::ObDDLType &ddl_type, + const uint64_t data_table_id, + const uint64_t index_table_id, + const int64_t schema_version, + const int64_t parent_task_id, + const int64_t consumer_group_id, + const int32_t sub_task_trace_id, + const int64_t parallelism, + const uint64_t tenant_data_version, + const obrpc::ObRebuildIndexArg &rebuild_index_arg); + int init(const ObDDLTaskRecord &task_record); + virtual int process() override; + virtual bool is_valid() const override; + virtual int serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const override; + virtual int deserialize_params_from_message(const uint64_t tenant_id, const char *buf, const int64_t buf_size, int64_t &pos) override; + virtual int64_t get_serialize_param_size() const override; + INHERIT_TO_STRING_KV("ObDDLTask", ObDDLTask, KP_(root_service)); + virtual int on_child_task_finish(const uint64_t child_task_key, + const int ret_code) override + { + return OB_SUCCESS; + } + virtual int cleanup_impl() override; + uint64_t get_new_index_id() { return new_index_id_; }; +private: + int check_switch_succ(); + int prepare(const share::ObDDLTaskStatus new_status); + int rebuild_index_impl(); + int drop_index_impl(const bool is_old_index); + int switch_index_name(const ObDDLTaskStatus next_task_status); + int create_and_wait_rebuild_task_finish(const share::ObDDLTaskStatus new_status); + int create_and_wait_drop_task_finish(const share::ObDDLTaskStatus new_status); + int succ(); + int fail(); + int update_task_message(); + int check_ddl_task_finish( + const int64_t tenant_id, + int64_t &task_id, + bool &is_finished); + int get_new_index_table_id( + ObSchemaGetterGuard &schema_guard, + const int64_t tenant_id, + const int64_t database_id, + const int64_t data_table_id, + const ObString &index_name, + int64_t &index_id); + int deep_copy_index_arg( + common::ObIAllocator &allocator, + const obrpc::ObRebuildIndexArg &src_index_arg, + obrpc::ObRebuildIndexArg &dst_index_arg); + virtual bool is_error_need_retry(const int ret_code) override + { + UNUSED(ret_code); + // we should always retry on drop index task + return task_status_ < share::ObDDLTaskStatus::DROP_SCHEMA; + } +private: + static const int64_t OB_REBUILD_INDEX_TASK_VERSION = 1; + ObRootService *root_service_; + obrpc::ObRebuildIndexArg rebuild_index_arg_; + int64_t index_build_task_id_; + int64_t index_drop_task_id_; + uint64_t new_index_id_; +}; + +} // end namespace rootserver +} // end namespace oceanbase + +#endif // OCEANBASE_ROOTSERVER_OB_REBUILD_INDEX_TASK_H diff --git a/src/rootserver/ddl_task/ob_vec_index_build_task.cpp b/src/rootserver/ddl_task/ob_vec_index_build_task.cpp new file mode 100644 index 0000000000..702b072e42 --- /dev/null +++ b/src/rootserver/ddl_task/ob_vec_index_build_task.cpp @@ -0,0 +1,1934 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX RS + +#include "rootserver/ddl_task/ob_vec_index_build_task.h" +#include "share/ob_ddl_common.h" +#include "share/ob_ddl_sim_point.h" +#include "share/ob_ddl_error_message_table_operator.h" +#include "rootserver/ob_root_service.h" +#include "rootserver/ob_index_builder.h" +#include "storage/ddl/ob_ddl_lock.h" +#include "share/ob_vec_index_builder_util.h" + +using namespace oceanbase::share; + +namespace oceanbase +{ +namespace rootserver +{ +/*************** ObVecIndexBuildTask *************/ + +ObVecIndexBuildTask::ObVecIndexBuildTask() + : ObDDLTask(ObDDLType::DDL_CREATE_VEC_INDEX), + index_table_id_(target_object_id_), + rowkey_vid_aux_table_id_(OB_INVALID_ID), + vid_rowkey_aux_table_id_(OB_INVALID_ID), + delta_buffer_table_id_(OB_INVALID_ID), + index_id_table_id_(OB_INVALID_ID), + index_snapshot_data_table_id_(OB_INVALID_ID), + rowkey_vid_task_submitted_(false), + vid_rowkey_task_submitted_(false), + delta_buffer_task_submitted_(false), + index_id_task_submitted_(false), + index_snapshot_data_task_submitted_(false), + rowkey_vid_task_id_(0), + vid_rowkey_task_id_(0), + delta_buffer_task_id_(0), + index_id_task_id_(0), + index_snapshot_task_id_(0), + drop_index_task_submitted_(false), + drop_index_task_id_(-1), + is_rebuild_index_(false), + root_service_(nullptr), + create_index_arg_(), + dependent_task_result_map_() +{ +} + +ObVecIndexBuildTask::~ObVecIndexBuildTask() +{ +} + +int ObVecIndexBuildTask::init( + const uint64_t tenant_id, + const int64_t task_id, + const ObTableSchema *data_table_schema, + const ObTableSchema *index_schema, + const int64_t schema_version, + const int64_t parallelism, + const int64_t consumer_group_id, + const obrpc::ObCreateIndexArg &create_index_arg, + const uint64_t tenant_data_version, + const int64_t parent_task_id /* = 0 */, + const int64_t task_status /* PREPARE */, + const int64_t snapshot_version) +{ + int ret = OB_SUCCESS; + const bool is_rebuild_index = create_index_arg.is_rebuild_index_; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_ISNULL(root_service_ = GCTX.root_service_)) { + ret = OB_ERR_SYS; + LOG_WARN("root_service is null", K(ret), KP(root_service_)); + } else if (!root_service_->in_service()) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("root service not in service", K(ret)); + } else if (OB_UNLIKELY(tenant_id == OB_INVALID_TENANT_ID || + task_id <= 0 || + OB_ISNULL(data_table_schema) || + OB_ISNULL(index_schema) || + schema_version <= 0 || + parallelism <= 0 || + consumer_group_id < 0 || + !create_index_arg.is_valid() || + !(tenant_data_version > 0) || + task_status < ObDDLTaskStatus::PREPARE || + task_status > ObDDLTaskStatus::SUCCESS || + snapshot_version < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(tenant_id), K(task_id), + KPC(data_table_schema), KPC(index_schema), K(schema_version), K(parallelism), + K(consumer_group_id), K(create_index_arg.is_valid()), K(create_index_arg), + K(task_status), K(snapshot_version), K(is_rebuild_index)); + } else if (OB_FAIL(deep_copy_index_arg(allocator_, + create_index_arg, + create_index_arg_))) { + LOG_WARN("fail to copy create index arg", K(ret), K(create_index_arg)); + } else { + set_gmt_create(ObTimeUtility::current_time()); + tenant_id_ = tenant_id; + task_id_ = task_id; + schema_version_ = schema_version; + parallelism_ = parallelism; + consumer_group_id_ = consumer_group_id; + parent_task_id_ = parent_task_id; + if (snapshot_version > 0) { + snapshot_version_ = snapshot_version; + } + object_id_ = data_table_schema->get_table_id(); + target_object_id_ = index_schema->get_table_id(); + index_table_id_ = index_schema->get_table_id(); + create_index_arg_.exec_tenant_id_ = tenant_id; + if (index_schema->is_vec_rowkey_vid_type()) { + rowkey_vid_aux_table_id_ = index_table_id_; + } else if (index_schema->is_vec_delta_buffer_type()) { + delta_buffer_table_id_ = index_table_id_; + } + task_version_ = OB_VEC_INDEX_BUILD_TASK_VERSION; + start_time_ = ObTimeUtility::current_time(); + data_format_version_ = tenant_data_version; + is_rebuild_index_ = is_rebuild_index; + if (OB_FAIL(ret)) { + } else if (FALSE_IT(task_status_ = static_cast(task_status))) { + } else if (OB_FAIL(init_ddl_task_monitor_info(index_schema->get_table_id()))) { + LOG_WARN("init ddl task monitor info failed", K(ret)); + } else { + dst_tenant_id_ = tenant_id_; + dst_schema_version_ = schema_version_; + is_inited_ = true; + } + } + return ret; +} + +int ObVecIndexBuildTask::init(const ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + const uint64_t data_table_id = task_record.object_id_; + const uint64_t index_table_id = task_record.target_object_id_; + const int64_t schema_version = task_record.schema_version_; + int64_t pos = 0; + const char *ddl_type_str = nullptr; + const char *target_name = nullptr; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_ISNULL(root_service_ = GCTX.root_service_)) { + ret = OB_ERR_SYS; + LOG_WARN("root_service is null", K(ret), KP(root_service_)); + } else if (!root_service_->in_service()) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("root service not in service", K(ret)); + } else if (!task_record.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(task_record)); + } else if (OB_FAIL(deserialize_params_from_message(task_record.tenant_id_, + task_record.message_.ptr(), + task_record.message_.length(), + pos))) { + LOG_WARN("deserialize params from message failed", K(ret)); + } else { + tenant_id_ = task_record.tenant_id_; + task_id_ = task_record.task_id_; + schema_version_ = schema_version; + parent_task_id_ = task_record.parent_task_id_; + task_status_ = static_cast(task_record.task_status_); + snapshot_version_ = task_record.snapshot_version_; + object_id_ = data_table_id; + target_object_id_ = index_table_id; + index_table_id_ = index_table_id; + execution_id_ = task_record.execution_id_; + ret_code_ = task_record.ret_code_; + start_time_ = ObTimeUtility::current_time(); + dst_tenant_id_ = tenant_id_; + dst_schema_version_ = schema_version_; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(init_ddl_task_monitor_info(index_table_id))) { + LOG_WARN("init ddl task monitor info failed", K(ret)); + } else { + is_inited_ = true; + // set up span during recover task + ddl_tracing_.open_for_recovery(); + } + } + return ret; +} + +int ObVecIndexBuildTask::process() +{ + int ret = OB_SUCCESS; + ObIndexType index_type = create_index_arg_.index_type_; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(check_health())) { + LOG_WARN("check health failed", K(ret)); + } else if (!share::schema::is_vec_index(index_type)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expect index type is of vec index", K(ret), K(index_type)); + } else if (!need_retry()) { + // by pass + } else { + // switch case for diff create_index_arg, since there are 5 aux tables + ddl_tracing_.restore_span_hierarchy(); + const ObDDLTaskStatus status = static_cast(task_status_); + switch (status) { + case ObDDLTaskStatus::PREPARE: { + if (OB_FAIL(prepare())) { + LOG_WARN("prepare failed", K(ret), K(*this)); + } + break; + } + case ObDDLTaskStatus::GENERATE_ROWKEY_VID_SCHEMA: { + if (OB_FAIL(prepare_rowkey_vid_table())) { + LOG_WARN("generate schema failed", K(ret), K(*this)); + } + break; + } + case ObDDLTaskStatus::WAIT_ROWKEY_VID_TABLE_COMPLEMENT: { + if (OB_FAIL(wait_aux_table_complement())) { + LOG_WARN("wait rowkey_vid table complement failed", K(ret), K(*this)); + } + break; + } + case ObDDLTaskStatus::GENERATE_VEC_AUX_SCHEMA: { + if (OB_FAIL(prepare_aux_index_tables())) { + LOG_WARN("generate schema failed", K(ret), K(*this)); + } + break; + } + case ObDDLTaskStatus::WAIT_VEC_AUX_TABLE_COMPLEMENT: { + if (OB_FAIL(wait_aux_table_complement())) { + LOG_WARN("wait aux table complement failed", K(ret), K(*this)); + } + break; + } + case ObDDLTaskStatus::GENERATE_VID_ROWKEY_SCHEMA: { + if (OB_FAIL(prepare_vid_rowkey_table())) { + LOG_WARN("generate schema failed", K(ret), K(*this)); + } + break; + } + case ObDDLTaskStatus::WAIT_VID_ROWKEY_TABLE_COMPLEMENT: { + if (OB_FAIL(wait_aux_table_complement())) { + LOG_WARN("wait aux table complement failed", K(ret), K(*this)); + } + break; + } + case ObDDLTaskStatus::VALIDATE_CHECKSUM: { + if (OB_FAIL(validate_checksum())) { + LOG_WARN("validate checksum failed", K(ret), K(*this)); + } + break; + } + case ObDDLTaskStatus::FAIL: { + if (OB_FAIL(clean_on_failed())) { + LOG_WARN("clean failed_task failed", K(ret), K(*this)); + } + break; + } + case ObDDLTaskStatus::SUCCESS: { + if (OB_FAIL(succ())) { + LOG_WARN("clean task on finish failed", K(ret), K(*this)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not expected status", K(ret), K(status), K(*this)); + } + } // end switch + ddl_tracing_.release_span_hierarchy(); + } + return ret; +} + +bool ObVecIndexBuildTask::is_valid() const +{ + return is_inited_ && !trace_id_.is_invalid(); +} + +int ObVecIndexBuildTask::deep_copy_index_arg( + common::ObIAllocator &allocator, + const obrpc::ObCreateIndexArg &source_arg, + obrpc::ObCreateIndexArg &dest_arg) +{ + int ret = OB_SUCCESS; + const int64_t serialize_size = source_arg.get_serialize_size(); + char *buf = nullptr; + int64_t pos = 0; + if (OB_ISNULL(buf = static_cast(allocator.alloc(serialize_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(serialize_size)); + } else if (OB_FAIL(source_arg.serialize(buf, serialize_size, pos))) { + LOG_WARN("serialize alter table arg", K(ret)); + } else if (FALSE_IT(pos = 0)) { + } else if (OB_FAIL(dest_arg.deserialize(buf, serialize_size, pos))) { + LOG_WARN("deserialize alter table arg failed", K(ret)); + } + if (OB_FAIL(ret) && nullptr != buf) { + allocator.free(buf); + } + return ret; +} + +int ObVecIndexBuildTask::check_health() +{ + int ret = OB_SUCCESS; + const ObDDLTaskStatus status = static_cast(task_status_); + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (!root_service_->in_service()) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("root service not in service, not need retry", K(ret)); + need_retry_ = false; // only stop run the task, need not clean up task context + } else if (OB_FAIL(refresh_status())) { // refresh task status + LOG_WARN("refresh status failed", K(ret)); + } else if (OB_FAIL(refresh_schema_version())) { + LOG_WARN("refresh schema version failed", K(ret)); + } else if (status == ObDDLTaskStatus::FAIL && drop_index_task_submitted_) { + /*already failed, and have submitted drop index task, do nothing*/ + } else { + ObMultiVersionSchemaService &schema_service = root_service_->get_schema_service(); + ObSchemaGetterGuard schema_guard; + const ObTableSchema *index_schema = nullptr; + bool is_data_table_exist = false; + bool is_all_indexes_exist = false; + if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id_, + schema_guard))) { + LOG_WARN("get tenant schema guard failed", K(ret), K(tenant_id_)); + } else if (OB_FAIL(schema_guard.check_table_exist(tenant_id_, + object_id_, + is_data_table_exist))) { + LOG_WARN("check data table exist failed", K(ret), K(tenant_id_), K(object_id_)); + } else if (OB_FAIL(check_aux_table_schemas_exist(is_all_indexes_exist))) { + LOG_WARN("check aux index table exist failed", K(ret), K(tenant_id_)); + } else if (status != ObDDLTaskStatus::FAIL && (!is_data_table_exist || !is_all_indexes_exist)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("data table or index table not exist", K(ret), K(is_data_table_exist), + K(is_all_indexes_exist)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, + index_table_id_, + index_schema))) { + LOG_WARN("get table schema failed", K(ret), K(tenant_id_), K(index_table_id_)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_SCHEMA_ERROR; + LOG_WARN("fail to get index_schema", K(ret), K(index_table_id_)); + } else if (ObIndexStatus::INDEX_STATUS_INDEX_ERROR == index_schema->get_index_status()) { + ret = OB_SUCCESS == ret_code_ ? OB_ERR_ADD_INDEX : ret_code_; + LOG_WARN("index status error", K(ret), K(index_table_id_), + K(index_schema->get_index_status())); + } + #ifdef ERRSIM + if (OB_SUCC(ret)) { + ret = check_errsim_error(); + } + #endif + if (OB_FAIL(ret) && !ObIDDLTask::in_ddl_retry_white_list(ret)) { + const ObDDLTaskStatus old_status = static_cast(task_status_); + const ObDDLTaskStatus new_status = ObDDLTaskStatus::FAIL; + (void)switch_status(new_status, false, ret); + LOG_WARN("switch status to build_failed", K(ret), KP(this), K(old_status), K(new_status)); + } + if (ObDDLTaskStatus::FAIL == static_cast(task_status_) || + ObDDLTaskStatus::SUCCESS == static_cast(task_status_)) { + ret = OB_SUCCESS; // allow clean up + } + } + check_ddl_task_execute_too_long(); + return ret; +} + +int ObVecIndexBuildTask::check_aux_table_schemas_exist(bool &is_all_exist) +{ + int ret = OB_SUCCESS; + is_all_exist = false; + const ObDDLTaskStatus status = static_cast(task_status_); + ObMultiVersionSchemaService &schema_service = root_service_->get_schema_service(); + ObSchemaGetterGuard schema_guard; + const ObTableSchema *index_schema = nullptr; + if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("get tenant schema guard failed", K(ret), K(tenant_id_)); + } else { + bool rowkey_vid_exist = true; + bool vid_rowkey_exist = true; + bool delta_buffer_aux_exist = true; + bool index_id_exist = true; + bool index_snapshot_data_exist = true; + if (status <= ObDDLTaskStatus::GENERATE_VEC_AUX_SCHEMA) { + is_all_exist = true; + if (OB_INVALID_ID != rowkey_vid_aux_table_id_) { + if (!is_rebuild_index_ && + OB_FAIL(schema_guard.check_table_exist(tenant_id_, + rowkey_vid_aux_table_id_, + rowkey_vid_exist))) { + LOG_WARN("check rowkey vid table exist failed", K(ret), K(tenant_id_), + K(rowkey_vid_aux_table_id_)); + } else { + is_all_exist &= rowkey_vid_exist; + } + } + if (OB_INVALID_ID != delta_buffer_table_id_) { + if (OB_FAIL(schema_guard.check_table_exist(tenant_id_, + delta_buffer_table_id_, + delta_buffer_aux_exist))) { + LOG_WARN("check delta buf index aux table exist failed", K(ret), K(tenant_id_), + K(delta_buffer_table_id_)); + } else { + is_all_exist &= delta_buffer_aux_exist; + } + } + } else if (status <= ObDDLTaskStatus::WAIT_VID_ROWKEY_TABLE_COMPLEMENT) { + if (!is_rebuild_index_ && + OB_FAIL(schema_guard.check_table_exist(tenant_id_, + rowkey_vid_aux_table_id_, + rowkey_vid_exist))) { + LOG_WARN("check rowkey_vid table exist failed", K(ret), K(tenant_id_), + K(rowkey_vid_aux_table_id_), K(status)); + } else if (OB_FAIL(schema_guard.check_table_exist(tenant_id_, + delta_buffer_table_id_, + delta_buffer_aux_exist))) { + LOG_WARN("check delta buffer table exist failed", K(ret), K(tenant_id_), + K(delta_buffer_table_id_), K(status)); + } else if (OB_FAIL(schema_guard.check_table_exist(tenant_id_, + index_id_table_id_, + index_id_exist))) { + LOG_WARN("check index id table exist failed", K(ret), K(tenant_id_), + K(index_id_table_id_), K(status)); + } else { + // is_all_exist = rowkey_vid_exist; + is_all_exist = (rowkey_vid_exist && delta_buffer_aux_exist + && index_id_exist); + } + } else if (status == ObDDLTaskStatus::VALIDATE_CHECKSUM) { + if (!is_rebuild_index_ && + OB_FAIL(schema_guard.check_table_exist(tenant_id_, + rowkey_vid_aux_table_id_, + rowkey_vid_exist))) { + LOG_WARN("check rowkey vid table exist failed", K(ret), K(tenant_id_), + K(rowkey_vid_aux_table_id_), K(status)); + } else if (!is_rebuild_index_ && + OB_FAIL(schema_guard.check_table_exist(tenant_id_, + vid_rowkey_aux_table_id_, + vid_rowkey_exist))) { + LOG_WARN("check vid rowkey table exist failed", K(ret), K(tenant_id_), + K(vid_rowkey_aux_table_id_), K(status)); + } else if (OB_FAIL(schema_guard.check_table_exist(tenant_id_, + delta_buffer_table_id_, + delta_buffer_aux_exist))) { + LOG_WARN("check delta buffer table exist failed", K(ret), K(tenant_id_), + K(delta_buffer_table_id_), K(status)); + } else if (OB_FAIL(schema_guard.check_table_exist(tenant_id_, + index_id_table_id_, + index_id_exist))) { + LOG_WARN("check index id table exist failed", K(ret), K(tenant_id_), + K(index_id_table_id_), K(status)); + } else if (OB_FAIL(schema_guard.check_table_exist(tenant_id_, + index_snapshot_data_table_id_, + index_snapshot_data_exist))) { + LOG_WARN("check index snapshot table exist failed", K(ret), K(tenant_id_), + K(index_snapshot_data_table_id_), K(status)); + } else { + is_all_exist = (rowkey_vid_exist && vid_rowkey_exist && + delta_buffer_aux_exist && index_id_exist && + index_snapshot_data_exist); + } + } + if (!is_all_exist) { + LOG_WARN("vec aux table not exist", K(status), K(rowkey_vid_exist), + K(vid_rowkey_exist), K(delta_buffer_aux_exist), + K(index_id_exist), K(index_snapshot_data_exist), K(status), + K(rowkey_vid_aux_table_id_), K(vid_rowkey_aux_table_id_), + K(delta_buffer_table_id_), K(index_id_table_id_), + K(index_snapshot_data_table_id_)); + } + } + return ret; +} + +int ObVecIndexBuildTask::get_next_status(share::ObDDLTaskStatus &next_status) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else { + ObIndexType index_type = create_index_arg_.index_type_; + const ObDDLTaskStatus status = static_cast(task_status_); + switch (status) { + case ObDDLTaskStatus::PREPARE: { + next_status = ObDDLTaskStatus::GENERATE_ROWKEY_VID_SCHEMA; + break; + } + case ObDDLTaskStatus::GENERATE_ROWKEY_VID_SCHEMA: { + next_status = ObDDLTaskStatus::WAIT_ROWKEY_VID_TABLE_COMPLEMENT; + break; + } + case ObDDLTaskStatus::WAIT_ROWKEY_VID_TABLE_COMPLEMENT: { + next_status = ObDDLTaskStatus::GENERATE_VEC_AUX_SCHEMA; + break; + } + case ObDDLTaskStatus::GENERATE_VEC_AUX_SCHEMA: { + next_status = ObDDLTaskStatus::WAIT_VEC_AUX_TABLE_COMPLEMENT; + break; + } + case ObDDLTaskStatus::WAIT_VEC_AUX_TABLE_COMPLEMENT: { + next_status = ObDDLTaskStatus::GENERATE_VID_ROWKEY_SCHEMA; + break; + } + case ObDDLTaskStatus::GENERATE_VID_ROWKEY_SCHEMA: { + next_status = ObDDLTaskStatus::WAIT_VID_ROWKEY_TABLE_COMPLEMENT; + break; + } + case ObDDLTaskStatus::WAIT_VID_ROWKEY_TABLE_COMPLEMENT: { + next_status = ObDDLTaskStatus::VALIDATE_CHECKSUM; + break; + } + case ObDDLTaskStatus::VALIDATE_CHECKSUM: { + next_status = ObDDLTaskStatus::SUCCESS; + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not expected status", K(ret), K(status), K(*this)); + } + } // end switch + } + return ret; +} + +int ObVecIndexBuildTask::prepare() +{ + int ret = OB_SUCCESS; + bool state_finished = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (ObDDLTaskStatus::PREPARE != task_status_) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task status not match", K(ret), K(task_status_)); + } else { + state_finished = true; + } + + if (state_finished && OB_SUCC(ret)) { + ObDDLTaskStatus next_status; + if (OB_FAIL(get_next_status(next_status))) { + LOG_WARN("failed to get next status", K(ret)); + } else { + (void)switch_status(next_status, true, ret); + LOG_INFO("prepare finished", K(ret), K(parent_task_id_), K(task_id_), K(*this)); + } + } + return ret; +} + +int ObVecIndexBuildTask::prepare_aux_table(const ObIndexType index_type, + bool &task_submitted, + uint64_t &aux_table_id, + int64_t &res_task_id) +{ + int ret = OB_SUCCESS; + SMART_VAR(obrpc::ObCreateIndexArg, index_arg) { + if (OB_FAIL(construct_create_index_arg(index_type, index_arg))) { + LOG_WARN("failed to construct rowkey doc id arg", K(ret)); + } else if (OB_FAIL(ObDomainIndexBuilderUtil::prepare_aux_table(task_submitted, + aux_table_id, + res_task_id, + lock_, + object_id_, + tenant_id_, + task_id_, + index_arg, + root_service_, + dependent_task_result_map_, + obrpc::ObRpcProxy::myaddr_, + OB_VEC_INDEX_BUILD_CHILD_TASK_NUM))) { + LOG_WARN("fail to prepare_aux_table", K(ret), K(index_type)); + } else if (OB_FAIL(update_task_message())) { + LOG_WARN("fail to update task message", K(ret), K(index_type)); + } + } // samart var + return ret; +} + +int ObVecIndexBuildTask::prepare_rowkey_vid_table() +{ + int ret = OB_SUCCESS; + bool state_finished = false; + const ObIndexType index_type = ObIndexType::INDEX_TYPE_VEC_ROWKEY_VID_LOCAL; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (ObDDLTaskStatus::GENERATE_ROWKEY_VID_SCHEMA != task_status_) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task status not match", K(ret), K(task_status_)); + } else if (is_rebuild_index_) { + LOG_DEBUG("skip prepare_rowkey_vid_table, is rebuild index"); + } else if (OB_FAIL(prepare_aux_table(index_type, + rowkey_vid_task_submitted_, + rowkey_vid_aux_table_id_, + rowkey_vid_task_id_))) { + LOG_WARN("failed to prepare rowkey vid aux table", K(ret), K(index_type), + K(rowkey_vid_task_submitted_), K(rowkey_vid_aux_table_id_)); + } + if (OB_SUCC(ret) && (rowkey_vid_task_submitted_ || is_rebuild_index_)) { + state_finished = true; + } + if (state_finished && OB_SUCC(ret)) { + ObDDLTaskStatus next_status; + if (OB_FAIL(get_next_status(next_status))) { + LOG_WARN("failed to get next status", K(ret)); + } else { + (void)switch_status(next_status, true, ret); + LOG_INFO("generate schema finished", K(ret), K(parent_task_id_), K(task_id_), + K(*this)); + } + } + return ret; +} + +int ObVecIndexBuildTask::prepare_aux_index_tables() +{ + int ret = OB_SUCCESS; + bool state_finished = false; + const ObIndexType aux_delta_buffer_type = ObIndexType::INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL; + const ObIndexType aux_index_id_type = ObIndexType::INDEX_TYPE_VEC_INDEX_ID_LOCAL; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (ObDDLTaskStatus::GENERATE_VEC_AUX_SCHEMA != task_status_) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task status not match", K(ret), K(task_status_)); + } else if (OB_FAIL(prepare_aux_table(aux_delta_buffer_type, + delta_buffer_task_submitted_, + delta_buffer_table_id_, + delta_buffer_task_id_))) { + LOG_WARN("failed to prepare delta buffer aux table", K(ret), + K(delta_buffer_task_submitted_), K(delta_buffer_table_id_)); + } else if (OB_FAIL(prepare_aux_table(aux_index_id_type, + index_id_task_submitted_, + index_id_table_id_, + index_id_task_id_))) { + LOG_WARN("failed to prepare index id aux table", K(ret), + K(index_id_task_submitted_), K(index_id_table_id_)); + } + if (OB_SUCC(ret) && delta_buffer_task_submitted_ && index_id_task_submitted_) { + state_finished = true; + } + if (state_finished && OB_SUCC(ret)) { + ObDDLTaskStatus next_status; + if (OB_FAIL(get_next_status(next_status))) { + LOG_WARN("failed to get next status", K(ret)); + } else { + (void)switch_status(next_status, true, ret); + LOG_INFO("generate schema finished", K(ret), K(parent_task_id_), K(task_id_), + K(*this)); + } + } + return ret; +} + +int ObVecIndexBuildTask::construct_create_index_arg( + const ObIndexType index_type, + obrpc::ObCreateIndexArg &arg) +{ + int ret = OB_SUCCESS; + if (share::schema::is_vec_rowkey_vid_type(index_type)) { + if (OB_FAIL(construct_rowkey_vid_arg(arg))) { + LOG_WARN("failed to construct rowkey vid arg", K(ret)); + } + } else if (share::schema::is_vec_vid_rowkey_type(index_type)) { + if (OB_FAIL(construct_vid_rowkey_arg(arg))) { + LOG_WARN("failed to construct vid rowkey arg", K(ret)); + } + } else if (share::schema::is_vec_delta_buffer_type(index_type)) { + if (OB_FAIL(construct_delta_buffer_arg(arg))) { + LOG_WARN("failed to construct delta buf index aux arg", K(ret)); + } + } else if (share::schema::is_vec_index_id_type(index_type)) { + if (OB_FAIL(construct_index_id_arg(arg))) { + LOG_WARN("failed to construct index id aux table arg", K(ret)); + } + } else if (share::schema::is_vec_index_snapshot_data_type(index_type)) { + if (OB_FAIL(construct_index_snapshot_data_arg(arg))) { + LOG_WARN("failed to construct snapshot aux table arg", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("undexpected index type", K(ret), K(index_type)); + } + return ret; +} + +int ObVecIndexBuildTask::prepare_vid_rowkey_table() +{ + int ret = OB_SUCCESS; + bool state_finished = false; + const ObIndexType aux_vid_rowkey_type = ObIndexType::INDEX_TYPE_VEC_VID_ROWKEY_LOCAL; + const ObIndexType aux_index_snapshot_type = ObIndexType::INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (ObDDLTaskStatus::GENERATE_VID_ROWKEY_SCHEMA != task_status_) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task status not match", K(ret), K(task_status_)); + } else if (!is_rebuild_index_ && + OB_FAIL(prepare_aux_table(aux_vid_rowkey_type, + vid_rowkey_task_submitted_, + vid_rowkey_aux_table_id_, + vid_rowkey_task_id_))) { + LOG_WARN("failed to prepare aux vid rowkey table", K(ret), + K(vid_rowkey_task_submitted_), K(vid_rowkey_aux_table_id_)); + } else if (OB_FAIL(prepare_aux_table(aux_index_snapshot_type, + index_snapshot_data_task_submitted_, + index_snapshot_data_table_id_, + index_snapshot_task_id_))) { + LOG_WARN("failed to prepare index snapshot aux table", K(ret), + K(index_snapshot_data_task_submitted_), K(index_snapshot_data_table_id_)); + } + if (OB_SUCC(ret) && + (vid_rowkey_task_submitted_ || is_rebuild_index_) && index_snapshot_data_task_submitted_) { + state_finished = true; + } + if (state_finished && OB_SUCC(ret)) { + ObDDLTaskStatus next_status; + if (OB_FAIL(get_next_status(next_status))) { + LOG_WARN("failed to get next status", K(ret)); + } else { + (void)switch_status(next_status, true, ret); + LOG_INFO("generate schema finished", K(ret), K(parent_task_id_), K(task_id_), + K(*this)); + } + } + return ret; +} + +int ObVecIndexBuildTask::construct_rowkey_vid_arg(obrpc::ObCreateIndexArg &arg) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(deep_copy_index_arg(allocator_, create_index_arg_, arg))) { + LOG_WARN("failed to deep copy index arg", K(ret)); + } else if (FALSE_IT(arg.index_type_ = INDEX_TYPE_VEC_ROWKEY_VID_LOCAL)) { + } else if (FALSE_IT(arg.index_option_.parser_name_.reset())) { + } else if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator_, arg.index_type_, arg.index_name_, arg.index_name_))) { + LOG_WARN("failed to generate index name", K(ret)); + } + return ret; +} + +int ObVecIndexBuildTask::construct_vid_rowkey_arg(obrpc::ObCreateIndexArg &arg) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(deep_copy_index_arg(allocator_, create_index_arg_, arg))) { + LOG_WARN("failed to deep copy index arg", K(ret)); + } else if (FALSE_IT(arg.index_type_ = INDEX_TYPE_VEC_VID_ROWKEY_LOCAL)) { + } else if (FALSE_IT(arg.index_option_.parser_name_.reset())) { + } else if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator_, arg.index_type_, arg.index_name_, arg.index_name_))) { + LOG_WARN("failed to generate index name", K(ret)); + } + return ret; +} + +int ObVecIndexBuildTask::construct_delta_buffer_arg(obrpc::ObCreateIndexArg &arg) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(deep_copy_index_arg(allocator_, create_index_arg_, arg))) { + LOG_WARN("failed to deep copy index arg", K(ret)); + } else if (FALSE_IT(arg.index_type_ = INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL)) { + } else if (FALSE_IT(arg.index_option_.parser_name_.reset())) { + } else if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator_, arg.index_type_, arg.index_name_, arg.index_name_))) { + LOG_WARN("failed to generate index name", K(ret)); + } + return ret; +} + +int ObVecIndexBuildTask::construct_index_id_arg(obrpc::ObCreateIndexArg &arg) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(deep_copy_index_arg(allocator_, create_index_arg_, arg))) { + LOG_WARN("failed to deep copy index arg", K(ret)); + } else if (FALSE_IT(arg.index_type_ = INDEX_TYPE_VEC_INDEX_ID_LOCAL)) { + } else if (FALSE_IT(arg.index_option_.parser_name_.reset())) { + } else if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator_, arg.index_type_, arg.index_name_, arg.index_name_))) { + LOG_WARN("failed to generate index name", K(ret)); + } + return ret; +} + +int ObVecIndexBuildTask::construct_index_snapshot_data_arg(obrpc::ObCreateIndexArg &arg) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(deep_copy_index_arg(allocator_, create_index_arg_, arg))) { + LOG_WARN("failed to deep copy index arg", K(ret)); + } else if (FALSE_IT(arg.index_type_ = INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL)) { + } else if (FALSE_IT(arg.index_option_.parser_name_.reset())) { + } else if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator_, arg.index_type_, arg.index_name_, arg.index_name_))) { + LOG_WARN("failed to generate index name", K(ret)); + } + return ret; +} + +int ObVecIndexBuildTask::record_index_table_id( + const obrpc::ObCreateIndexArg *create_index_arg, + uint64_t &aux_table_id) +{ + int ret = OB_SUCCESS; + ObIndexType index_type = create_index_arg->index_type_; + if (share::schema::is_vec_rowkey_vid_type(index_type)) { + rowkey_vid_aux_table_id_ = aux_table_id; + } else if (share::schema::is_vec_vid_rowkey_type(index_type)) { + vid_rowkey_aux_table_id_ = aux_table_id; + } else if (share::schema::is_vec_delta_buffer_type(index_type)) { + delta_buffer_table_id_ = aux_table_id; + } else if (share::schema::is_vec_index_id_type(index_type)) { + index_id_table_id_ = aux_table_id; + } else if (share::schema::is_vec_index_snapshot_data_type(index_type)) { + index_snapshot_data_table_id_ = aux_table_id; + } + return ret; +} + +int ObVecIndexBuildTask::get_index_table_id( + const obrpc::ObCreateIndexArg *create_index_arg, + uint64_t &index_table_id) +{ + int ret = OB_SUCCESS; + ObIndexType index_type = create_index_arg->index_type_; + if (share::schema::is_vec_rowkey_vid_type(index_type)) { + index_table_id = rowkey_vid_aux_table_id_; + } else if (share::schema::is_vec_vid_rowkey_type(index_type)) { + index_table_id = vid_rowkey_aux_table_id_; + } else if (share::schema::is_vec_delta_buffer_type(index_type)) { + index_table_id = delta_buffer_table_id_; + } else if (share::schema::is_vec_index_id_type(index_type)) { + index_table_id = index_id_table_id_; + } else if (share::schema::is_vec_index_snapshot_data_type(index_type)) { + index_table_id = index_snapshot_data_table_id_; + } + return ret; +} + +int ObVecIndexBuildTask::CheckTaskStatusFn::operator()(common::hash::HashMapPair &entry) +{ + int ret = OB_SUCCESS; + if (child_task_failed_ || state_finished_) { + // do nothing + } else { + const uint64_t task_key = entry.first; + const int64_t target_object_id = -1; + const int64_t child_task_id = entry.second.task_id_; + if (entry.second.ret_code_ == INT64_MAX) { + // maybe ddl already finish when switching rs + HEAP_VAR(ObDDLErrorMessageTableOperator::ObBuildDDLErrorMessage, error_message) { + int64_t unused_user_msg_len = 0; + ObAddr unused_addr; + if (OB_FAIL(ObDDLErrorMessageTableOperator::get_ddl_error_message( + dest_tenant_id_, + child_task_id, + target_object_id, + unused_addr, + false, //is_ddl_retry_task + *GCTX.sql_proxy_, + error_message, + unused_user_msg_len))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + LOG_INFO("ddl task not finish", K(dest_tenant_id_), K(task_key), + K(child_task_id), K(target_object_id)); + } else { + LOG_WARN("fail to get ddl error message", K(ret), K(task_key), + K(child_task_id), K(target_object_id)); + } + } else { + finished_task_cnt_++; + if (error_message.ret_code_ != OB_SUCCESS) { + ret = error_message.ret_code_; + child_task_failed_ = true; + state_finished_ = true; + } + } + } + } else { + finished_task_cnt_++; + if (entry.second.ret_code_ != OB_SUCCESS) { + ret = entry.second.ret_code_; + child_task_failed_ = true; + state_finished_ = true; + } + } + } + return ret; +} + +// wait data complement of aux index tables +int ObVecIndexBuildTask::wait_aux_table_complement() +{ + int ret = OB_SUCCESS; + bool child_task_failed = false; + bool state_finished = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (ObDDLTaskStatus::WAIT_ROWKEY_VID_TABLE_COMPLEMENT != task_status_ && + ObDDLTaskStatus::WAIT_VEC_AUX_TABLE_COMPLEMENT != task_status_ && + ObDDLTaskStatus::WAIT_VID_ROWKEY_TABLE_COMPLEMENT != task_status_) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task status not match", K(ret), K(task_status_)); + } else if (is_rebuild_index_ && + (ObDDLTaskStatus::WAIT_ROWKEY_VID_TABLE_COMPLEMENT == task_status_)) { + state_finished = true; + LOG_DEBUG("rebuild index, no share table rebuild, no need to wait", K(task_status_)); + } else { + int64_t finished_task_cnt = 0; + CheckTaskStatusFn check_task_status_fn(dependent_task_result_map_, finished_task_cnt, child_task_failed, state_finished, dst_tenant_id_); + if (OB_FAIL(dependent_task_result_map_.foreach_refactored(check_task_status_fn))) { + if (OB_ITER_END != ret) { + LOG_WARN("foreach refactored failed", K(ret), K(dst_tenant_id_), K(child_task_failed)); + if (!child_task_failed) { + LOG_WARN("check status failed, but child_task_failed is false, check reason!", K(ret), K(dst_tenant_id_), K(child_task_failed)); + } + } else { + ret = OB_SUCCESS; // reach max dump count + } + } + if (finished_task_cnt == dependent_task_result_map_.size() || OB_FAIL(ret)) { + // 1. all child tasks finish. + // 2. the parent task exits if any child task fails. + state_finished = true; + } + } + if (state_finished) { + ObDDLTaskStatus next_status; + if (child_task_failed) { + if (!ObIDDLTask::in_ddl_retry_white_list(ret)) { + const ObDDLTaskStatus old_status = static_cast(task_status_); + const ObDDLTaskStatus new_status = ObDDLTaskStatus::FAIL; + (void)switch_status(new_status, false, ret); + ret = OB_SUCCESS; // allow clean up + } + } else if (OB_SUCC(ret)) { + if (OB_FAIL(get_next_status(next_status))) { + LOG_WARN("failed to get next status", K(ret)); + } else { + (void)switch_status(next_status, true, ret); + LOG_INFO("wait aux table complement finished", K(ret), K(parent_task_id_), + K(task_id_), K(*this)); + } + } + } + return ret; +} + +int ObVecIndexBuildTask::on_child_task_finish( + const uint64_t child_task_key, + const int ret_code) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObVecIndexBuildTask has not been inited", K(ret)); + } else if (OB_UNLIKELY(common::OB_INVALID_ID == child_task_key)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(child_task_key)); + } else { + TCWLockGuard guard(lock_); + int64_t org_ret = INT64_MAX; + share::ObDomainDependTaskStatus status; + if (OB_FAIL(dependent_task_result_map_.get_refactored(child_task_key, + status))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_ENTRY_NOT_EXIST; + } + LOG_WARN("get from dependent_task_result_map failed", K(ret), + K(child_task_key)); + } else if (org_ret != INT64_MAX && org_ret != ret_code) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error unexpected, ddl result triggers twice", K(ret), + K(child_task_key)); + } else if (FALSE_IT(status.ret_code_ = ret_code)) { + } else if (OB_FAIL(dependent_task_result_map_.set_refactored(child_task_key, + status, + true/*overwrite*/))) { + LOG_WARN("set dependent_task_result_map failed", K(ret), K(child_task_key)); + } else { + LOG_INFO("child task finish successfully", K(child_task_key)); + } + } + return ret; +} + +int ObVecIndexBuildTask::update_index_status_in_schema( + const ObTableSchema &index_schema, + const ObIndexStatus new_status) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else { + obrpc::ObUpdateIndexStatusArg arg; + arg.index_table_id_ = index_schema.get_table_id(); + arg.status_ = new_status; + arg.exec_tenant_id_ = tenant_id_; + arg.in_offline_ddl_white_list_ = true; + arg.task_id_ = task_id_; + int64_t ddl_rpc_timeout = 0; + int64_t tmp_timeout = 0; + if (INDEX_STATUS_AVAILABLE == new_status) { + const bool is_create_index_syntax = create_index_arg_.ddl_stmt_str_.trim().prefix_match_ci("create"); + if (create_index_arg_.ddl_stmt_str_.empty()) { + // alter table syntax. + } else if (OB_UNLIKELY(!is_create_index_syntax)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), "ddl_stmt_str", + create_index_arg_.ddl_stmt_str_, K(create_index_arg_)); + } else { + // For create index syntax, create_index_arg_ will record the user sql, + // and generate the ddl_stmt_str when anabling index. + // For alter table add index syntax, create_index_arg_ will not record + // the user sql, and generate the ddl_stmt_str when generating index schema. + arg.ddl_stmt_str_ = create_index_arg_.ddl_stmt_str_; + } + } + + DEBUG_SYNC(BEFORE_UPDATE_GLOBAL_INDEX_STATUS); + obrpc::ObCommonRpcProxy *common_rpc = nullptr; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout(index_schema.get_all_part_num(), + ddl_rpc_timeout))) { + LOG_WARN("get ddl rpc timeout fail", K(ret)); + } else if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout(tenant_id_, + index_schema.get_data_table_id(), + tmp_timeout))) { + LOG_WARN("get ddl rpc timeout fail", K(ret)); + } else if (OB_FALSE_IT(ddl_rpc_timeout += tmp_timeout)) { + } else if (OB_FALSE_IT(common_rpc = root_service_->get_ddl_service().get_common_rpc())) { + } else if (OB_ISNULL(common_rpc)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("common rpc is nullptr", K(ret)); + } else if (OB_FAIL(common_rpc->to(GCTX.self_addr()).timeout(ddl_rpc_timeout). + update_index_status(arg))) { + LOG_WARN("update index status failed", K(ret), K(arg)); + } else { + LOG_INFO("notify index status changed finish", K(new_status), + K(arg.index_table_id_), K(ddl_rpc_timeout), "ddl_stmt_str", arg.ddl_stmt_str_); + } + } + return ret; +} + +int ObVecIndexBuildTask::serialize_params_to_message( + char *buf, + const int64_t buf_len, + int64_t &pos) const +{ + int ret = OB_SUCCESS; + int8_t rowkey_vid_submitted = static_cast(rowkey_vid_task_submitted_); + int8_t vid_rowkey_submitted = static_cast(vid_rowkey_task_submitted_); + int8_t delta_buffer_task_submitted = static_cast(delta_buffer_task_submitted_); + int8_t index_id_task_submitted = static_cast(index_id_task_submitted_); + int8_t index_snapshot_data_task_submitted = static_cast(index_snapshot_data_task_submitted_); + int8_t drop_index_submitted = static_cast(drop_index_task_submitted_); + int8_t is_rebuild_index = static_cast(is_rebuild_index_); + + if (OB_UNLIKELY(nullptr == buf || buf_len <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(buf), K(buf_len)); + } else if (OB_FAIL(ObDDLTask::serialize_params_to_message(buf, buf_len, pos))) { + LOG_WARN("ObDDLTask serialize failed", K(ret)); + } else if (OB_FAIL(create_index_arg_.serialize(buf, buf_len, pos))) { + LOG_WARN("serialize create index arg failed", K(ret)); + } else if (OB_FAIL(serialization::encode(buf, + buf_len, + pos, + rowkey_vid_aux_table_id_))) { + LOG_WARN("serialize rowkey vec table id failed", K(ret)); + } else if (OB_FAIL(serialization::encode(buf, + buf_len, + pos, + vid_rowkey_aux_table_id_))) { + LOG_WARN("serialize vid rowkey table id failed", K(ret)); + } else if (OB_FAIL(serialization::encode(buf, + buf_len, + pos, + delta_buffer_table_id_))) { + LOG_WARN("serialize delta buffer index table id failed", K(ret)); + } else if (OB_FAIL(serialization::encode(buf, + buf_len, + pos, + index_id_table_id_))) { + LOG_WARN("serialize index id table id failed", K(ret)); + } else if (OB_FAIL(serialization::encode(buf, + buf_len, + pos, + index_snapshot_data_table_id_))) { + LOG_WARN("serialize snapshot table id failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i8(buf, + buf_len, + pos, + rowkey_vid_submitted))) { + LOG_WARN("serialize rowkey vid task submitted failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i8(buf, + buf_len, + pos, + vid_rowkey_submitted))) { + LOG_WARN("serialize vid rowkey task submitted failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i8(buf, + buf_len, + pos, + delta_buffer_task_submitted))) { + LOG_WARN("serialize delta buf task submitted failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i8(buf, + buf_len, + pos, + index_id_task_submitted))) { + LOG_WARN("serialize index id task submitted failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i8(buf, + buf_len, + pos, + index_snapshot_data_task_submitted))) { + LOG_WARN("serialize snapshot task submitted failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(buf, + buf_len, + pos, + rowkey_vid_task_id_))) { + LOG_WARN("serialize rowkey vid task id failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(buf, + buf_len, + pos, + vid_rowkey_task_id_))) { + LOG_WARN("serialize vid rowkey task id failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(buf, + buf_len, + pos, + delta_buffer_task_id_))) { + LOG_WARN("serialize delta buf task id failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(buf, + buf_len, + pos, + index_id_task_id_))) { + LOG_WARN("serialize index id task id failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(buf, + buf_len, + pos, + index_snapshot_task_id_))) { + LOG_WARN("serialize index snapshot task id failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i8(buf, + buf_len, + pos, + drop_index_submitted))) { + LOG_WARN("serialize drop vec index task submitted failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(buf, + buf_len, + pos, + drop_index_task_id_))) { + LOG_WARN("serialize drop index task id failed", K(ret)); + } else if (OB_FAIL(serialization::encode_i8(buf, + buf_len, + pos, + is_rebuild_index))) { + LOG_WARN("serialize drop index task id failed", K(ret)); + } + return ret; +} + +int ObVecIndexBuildTask::deserialize_params_from_message( + const uint64_t tenant_id, + const char *buf, + const int64_t data_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + int8_t rowkey_vid_submitted = 0; + int8_t vid_rowkey_submitted = 0; + int8_t delta_buffer_task_submitted = 0; + int8_t index_id_task_submitted = 0; + int8_t index_snapshot_data_task_submitted = 0; + int8_t drop_index_submitted = 0; + int8_t is_rebuild_index = 0; + obrpc::ObCreateIndexArg tmp_arg; + if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || + nullptr == buf || + data_len <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(tenant_id), KP(buf), K(data_len)); + } else if (OB_FAIL(ObDDLTask::deserialize_params_from_message(tenant_id, + buf, + data_len, + pos))) { + LOG_WARN("ObDDLTask deserlize failed", K(ret)); + } else if (OB_FAIL(tmp_arg.deserialize(buf, data_len, pos))) { + LOG_WARN("deserialize table failed", K(ret)); + } else if (OB_FAIL(ObDDLUtil::replace_user_tenant_id(tenant_id, tmp_arg))) { + LOG_WARN("replace user tenant id failed", K(ret), K(tenant_id), K(tmp_arg)); + } else if (OB_FAIL(deep_copy_table_arg(allocator_, tmp_arg, create_index_arg_))) { + LOG_WARN("deep copy create index arg failed", K(ret)); + } else if (OB_FAIL(serialization::decode(buf, + data_len, pos, rowkey_vid_aux_table_id_))) { + LOG_WARN("fail to deserialize rowkey vid table id", K(ret)); + } else if (OB_FAIL(serialization::decode(buf, + data_len, + pos, + vid_rowkey_aux_table_id_))) { + LOG_WARN("fail to deserialize vid rowkey table id", K(ret)); + } else if (OB_FAIL(serialization::decode(buf, + data_len, + pos, + delta_buffer_table_id_))) { + LOG_WARN("fail to deserialize delta buf index aux table id", K(ret)); + } else if (OB_FAIL(serialization::decode(buf, + data_len, + pos, + index_id_table_id_))) { + LOG_WARN("fail to deserialize index id table id", K(ret)); + } else if (OB_FAIL(serialization::decode(buf, + data_len, + pos, + index_snapshot_data_table_id_))) { + LOG_WARN("fail to deserialize snapthot table id", K(ret)); + } else if (OB_FAIL(serialization::decode_i8(buf, + data_len, + pos, + &rowkey_vid_submitted))) { + LOG_WARN("fail to deserialize rowkey vid task submmitted", K(ret)); + } else if (OB_FAIL(serialization::decode_i8(buf, + data_len, + pos, + &vid_rowkey_submitted))) { + LOG_WARN("fail to deserialize vid rowkey task submmitted", K(ret)); + } else if (OB_FAIL(serialization::decode_i8(buf, + data_len, + pos, + &delta_buffer_task_submitted))) { + LOG_WARN("fail to deserialize vid index aux task submmitted", K(ret)); + } else if (OB_FAIL(serialization::decode_i8(buf, + data_len, + pos, + &index_id_task_submitted))) { + LOG_WARN("fail to deserialize index id task submmitted", K(ret)); + } else if (OB_FAIL(serialization::decode_i8(buf, + data_len, + pos, + &index_snapshot_data_task_submitted))) { + LOG_WARN("fail to deserialize snapshot task submmitted", K(ret)); + } else if (OB_FAIL(serialization::decode_i64(buf, + data_len, + pos, + &rowkey_vid_task_id_))) { + LOG_WARN("fail to deserialize rowkey vid task id", K(ret)); + } else if (OB_FAIL(serialization::decode_i64(buf, + data_len, + pos, + &vid_rowkey_task_id_))) { + LOG_WARN("fail to deserialize vid rowkey task id", K(ret)); + } else if (OB_FAIL(serialization::decode_i64(buf, + data_len, + pos, + &delta_buffer_task_id_))) { + LOG_WARN("fail to deserialize delta buffer index aux task id", K(ret)); + } else if (OB_FAIL(serialization::decode_i64(buf, + data_len, + pos, + &index_id_task_id_))) { + LOG_WARN("fail to deserialize index id task id", K(ret)); + } else if (OB_FAIL(serialization::decode_i64(buf, + data_len, + pos, + &index_snapshot_task_id_))) { + LOG_WARN("fail to deserialize index sanpshot id task id", K(ret)); + } else if (OB_FAIL(serialization::decode_i8(buf, + data_len, + pos, + &drop_index_submitted))) { + LOG_WARN("fail to deserialize drop vec index task submmitted", K(ret)); + } else if (OB_FAIL(serialization::decode_i64(buf, + data_len, + pos, + &drop_index_task_id_))) { + LOG_WARN("fail to deserialize drop vec index task id", K(ret)); + } else if (OB_FAIL(serialization::decode_i8(buf, + data_len, + pos, + &is_rebuild_index))) { + LOG_WARN("fail to deserialize is_rebuild_index", K(ret)); + } else if (!dependent_task_result_map_.created() && + OB_FAIL(dependent_task_result_map_.create(OB_VEC_INDEX_BUILD_CHILD_TASK_NUM, + lib::ObLabel("DepTasMap")))) { + LOG_WARN("create dependent task map failed", K(ret)); + } else { + rowkey_vid_task_submitted_ = rowkey_vid_submitted; + vid_rowkey_task_submitted_ = vid_rowkey_submitted; + delta_buffer_task_submitted_ = delta_buffer_task_submitted; + index_id_task_submitted_ = index_id_task_submitted; + index_snapshot_data_task_submitted_ = index_snapshot_data_task_submitted; + drop_index_task_submitted_ = drop_index_submitted; + is_rebuild_index_ = is_rebuild_index; + if (rowkey_vid_task_id_ != 0) { + share::ObDomainDependTaskStatus rowkey_vid_status; + rowkey_vid_status.task_id_ = rowkey_vid_task_id_; + if (OB_FAIL(dependent_task_result_map_.set_refactored(rowkey_vid_aux_table_id_, + rowkey_vid_status))) { + LOG_WARN("set dependent task map failed", K(ret), K(rowkey_vid_aux_table_id_), + K(rowkey_vid_status)); + } + } + if (OB_SUCC(ret) && vid_rowkey_task_id_ != 0) { + share::ObDomainDependTaskStatus vid_rowkey_status; + vid_rowkey_status.task_id_ = vid_rowkey_task_id_; + if (OB_FAIL(dependent_task_result_map_.set_refactored(vid_rowkey_aux_table_id_, + vid_rowkey_status))) { + LOG_WARN("set dependent task map failed", K(ret), K(vid_rowkey_aux_table_id_), + K(vid_rowkey_status)); + } + } + if (OB_SUCC(ret) && delta_buffer_task_id_ != 0) { + share::ObDomainDependTaskStatus delta_buf_aux_status; + delta_buf_aux_status.task_id_ = delta_buffer_task_id_; + if (OB_FAIL(dependent_task_result_map_.set_refactored(delta_buffer_table_id_, + delta_buf_aux_status))) { + LOG_WARN("set dependent task map failed", K(ret), K(delta_buffer_table_id_), + K(delta_buf_aux_status)); + } + } + if (OB_SUCC(ret) && index_id_task_id_ != 0) { + share::ObDomainDependTaskStatus index_id_aux_status; + index_id_aux_status.task_id_ = index_id_task_id_; + if (OB_FAIL(dependent_task_result_map_.set_refactored(index_id_table_id_, + index_id_aux_status))) { + LOG_WARN("set dependent task map failed", K(ret), K(index_id_table_id_), + K(index_id_aux_status)); + } + } + if (OB_SUCC(ret) && index_snapshot_task_id_ != 0) { + share::ObDomainDependTaskStatus index_snapshot_aux_status; + index_snapshot_aux_status.task_id_ = index_snapshot_task_id_; + if (OB_FAIL(dependent_task_result_map_.set_refactored(index_snapshot_data_table_id_, + index_snapshot_aux_status))) { + LOG_WARN("set dependent task map failed", K(ret), K(index_snapshot_data_table_id_), + K(index_snapshot_aux_status)); + } + } + } + return ret; +} + +int64_t ObVecIndexBuildTask::get_serialize_param_size() const +{ + int8_t rowkey_vid_submitted = static_cast(rowkey_vid_task_submitted_); + int8_t vid_rowkey_submitted = static_cast(vid_rowkey_task_submitted_); + int8_t delta_buffer_task_submitted = static_cast(delta_buffer_task_submitted_); + int8_t index_id_task_submitted = static_cast(index_id_task_submitted_); + int8_t index_snapshot_data_task_submitted = static_cast(index_snapshot_data_task_submitted_); + int8_t drop_index_submitted = static_cast(drop_index_task_submitted_); + int8_t is_rebuild_index = static_cast(is_rebuild_index_); + return create_index_arg_.get_serialize_size() + + ObDDLTask::get_serialize_param_size() + + serialization::encoded_length(rowkey_vid_aux_table_id_) + + serialization::encoded_length(vid_rowkey_aux_table_id_) + + serialization::encoded_length(delta_buffer_table_id_) + + serialization::encoded_length(index_id_table_id_) + + serialization::encoded_length(index_snapshot_data_table_id_) + + serialization::encoded_length_i8(rowkey_vid_submitted) + + serialization::encoded_length_i8(vid_rowkey_submitted) + + serialization::encoded_length_i8(delta_buffer_task_submitted) + + serialization::encoded_length_i8(index_id_task_submitted) + + serialization::encoded_length_i8(index_snapshot_data_task_submitted) + + serialization::encoded_length_i64(rowkey_vid_task_id_) + + serialization::encoded_length_i64(vid_rowkey_task_id_) + + serialization::encoded_length_i64(delta_buffer_task_id_) + + serialization::encoded_length_i64(index_id_task_id_) + + serialization::encoded_length_i64(index_snapshot_task_id_) + + serialization::encoded_length_i8(drop_index_submitted) + + serialization::encoded_length_i64(drop_index_task_id_) + + serialization::encoded_length_i8(is_rebuild_index); +} + +int ObVecIndexBuildTask::print_child_task_ids(char *buf, int64_t len) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(buf)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should not be null", K(ret)); + } else { + int64_t pos = 0; + MEMSET(buf, 0, len); + TCRLockGuard guard(lock_); + common::hash::ObHashMap ::const_iterator iter = + dependent_task_result_map_.begin(); + if (OB_FAIL(databuff_printf(buf, len, pos, "[ "))) { + LOG_WARN("failed to print", K(ret)); + } else { + while (OB_SUCC(ret) && iter != dependent_task_result_map_.end()) { + const int64_t child_task_id = iter->second.task_id_; + if (OB_FAIL(databuff_printf(buf, + len, + pos, + "%ld ", + child_task_id))) { + LOG_WARN("failed to print", K(ret)); + } + ++iter; + } + if (OB_SUCC(ret)) { + databuff_printf(buf, len, pos, "]"); + } + } + } + return ret; +} + +int ObVecIndexBuildTask::collect_longops_stat(ObLongopsValue &value) +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + const ObDDLTaskStatus status = static_cast(task_status_); + databuff_printf(stat_info_.message_, MAX_LONG_OPS_MESSAGE_LENGTH, pos, "TENANT_ID: %ld, TASK_ID: %ld, ", tenant_id_, task_id_); + switch(status) { + case ObDDLTaskStatus::PREPARE: { + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: PREPARE"))) { + LOG_WARN("failed to print", K(ret), K(rowkey_vid_aux_table_id_), K(rowkey_vid_task_submitted_), + K(vid_rowkey_aux_table_id_), K(vid_rowkey_task_submitted_), + K(delta_buffer_table_id_), K(delta_buffer_task_submitted_), + K(index_id_table_id_), K(index_id_task_submitted_), + K(index_snapshot_data_table_id_), K(index_id_task_submitted_)); + } + break; + } + case ObDDLTaskStatus::GENERATE_ROWKEY_VID_SCHEMA: { + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: GENERATE_ROWKEY_VID_SCHEMA"))) { + LOG_WARN("failed to print", K(ret), K(rowkey_vid_aux_table_id_), K(rowkey_vid_task_submitted_), + K(vid_rowkey_aux_table_id_), K(vid_rowkey_task_submitted_), + K(delta_buffer_table_id_), K(delta_buffer_task_submitted_), + K(index_id_table_id_), K(index_id_task_submitted_), + K(index_snapshot_data_table_id_), K(index_id_task_submitted_)); + } + break; + } + case ObDDLTaskStatus::WAIT_ROWKEY_VID_TABLE_COMPLEMENT: { + char child_task_ids[MAX_LONG_OPS_MESSAGE_LENGTH]; + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: WAIT_ROWKEY_VID_TABLE_COMPLEMENT"))) { + LOG_WARN("failed to print", K(ret), K(rowkey_vid_aux_table_id_), K(rowkey_vid_task_submitted_), + K(vid_rowkey_aux_table_id_), K(vid_rowkey_task_submitted_), + K(delta_buffer_table_id_), K(delta_buffer_task_submitted_), + K(index_id_table_id_), K(index_id_task_submitted_), + K(index_snapshot_data_table_id_), K(index_id_task_submitted_)); + } else if (OB_FAIL(print_child_task_ids(child_task_ids, MAX_LONG_OPS_MESSAGE_LENGTH))) { + if (ret == OB_SIZE_OVERFLOW) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get all child task ids", K(ret)); + } + } + break; + } + case ObDDLTaskStatus::GENERATE_VEC_AUX_SCHEMA: { + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: GENERATE_VEC_AUX_SCHEMA"))) { + LOG_WARN("failed to print", K(ret), K(rowkey_vid_aux_table_id_), K(rowkey_vid_task_submitted_), + K(vid_rowkey_aux_table_id_), K(vid_rowkey_task_submitted_), + K(delta_buffer_table_id_), K(delta_buffer_task_submitted_), + K(index_id_table_id_), K(index_id_task_submitted_), + K(index_snapshot_data_table_id_), K(index_id_task_submitted_)); + } + break; + } + case ObDDLTaskStatus::WAIT_VEC_AUX_TABLE_COMPLEMENT: { + char child_task_ids[MAX_LONG_OPS_MESSAGE_LENGTH]; + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: WAIT_VEC_AUX_TABLE_COMPLEMENT"))) { + LOG_WARN("failed to print", K(ret), K(rowkey_vid_aux_table_id_), K(rowkey_vid_task_submitted_), + K(vid_rowkey_aux_table_id_), K(vid_rowkey_task_submitted_), + K(delta_buffer_table_id_), K(delta_buffer_task_submitted_), + K(index_id_table_id_), K(index_id_task_submitted_), + K(index_snapshot_data_table_id_), K(index_id_task_submitted_)); + } else if (OB_FAIL(print_child_task_ids(child_task_ids, MAX_LONG_OPS_MESSAGE_LENGTH))) { + if (ret == OB_SIZE_OVERFLOW) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get all child task ids", K(ret)); + } + } + break; + } + case ObDDLTaskStatus::GENERATE_VID_ROWKEY_SCHEMA: { + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: GENERATE_VID_ROWKEY_SCHEMA"))) { + LOG_WARN("failed to print", K(ret), K(rowkey_vid_aux_table_id_), K(rowkey_vid_task_submitted_), + K(vid_rowkey_aux_table_id_), K(vid_rowkey_task_submitted_), + K(delta_buffer_table_id_), K(delta_buffer_task_submitted_), + K(index_id_table_id_), K(index_id_task_submitted_), + K(index_snapshot_data_table_id_), K(index_id_task_submitted_)); + } + break; + } + case ObDDLTaskStatus::WAIT_VID_ROWKEY_TABLE_COMPLEMENT: { + char child_task_ids[MAX_LONG_OPS_MESSAGE_LENGTH]; + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: WAIT_VID_ROWKEY_TABLE_COMPLEMENT"))) { + LOG_WARN("failed to print", K(ret), K(rowkey_vid_aux_table_id_), K(rowkey_vid_task_submitted_), + K(vid_rowkey_aux_table_id_), K(vid_rowkey_task_submitted_), + K(delta_buffer_table_id_), K(delta_buffer_task_submitted_), + K(index_id_table_id_), K(index_id_task_submitted_), + K(index_snapshot_data_table_id_), K(index_id_task_submitted_)); + } else if (OB_FAIL(print_child_task_ids(child_task_ids, MAX_LONG_OPS_MESSAGE_LENGTH))) { + if (ret == OB_SIZE_OVERFLOW) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get all child task ids", K(ret)); + } + } + break; + } + case ObDDLTaskStatus::VALIDATE_CHECKSUM: { + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: VALIDATE_CHECKSUM"))) { + LOG_WARN("failed to print", K(ret), K(rowkey_vid_aux_table_id_), K(rowkey_vid_task_submitted_), + K(vid_rowkey_aux_table_id_), K(vid_rowkey_task_submitted_), + K(delta_buffer_table_id_), K(delta_buffer_task_submitted_), + K(index_id_table_id_), K(index_id_task_submitted_), + K(index_snapshot_data_table_id_), K(index_id_task_submitted_)); + } + break; + } + case ObDDLTaskStatus::FAIL: { + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: FAIL"))) { + LOG_WARN("failed to print", K(ret), K(rowkey_vid_aux_table_id_), K(rowkey_vid_task_submitted_), + K(vid_rowkey_aux_table_id_), K(vid_rowkey_task_submitted_), + K(delta_buffer_table_id_), K(delta_buffer_task_submitted_), + K(index_id_table_id_), K(index_id_task_submitted_), + K(index_snapshot_data_table_id_), K(index_id_task_submitted_)); + } + break; + } + case ObDDLTaskStatus::SUCCESS: { + if (OB_FAIL(databuff_printf(stat_info_.message_, + MAX_LONG_OPS_MESSAGE_LENGTH, + pos, + "STATUS: SUCCESS"))) { + LOG_WARN("failed to print", K(ret), K(rowkey_vid_aux_table_id_), K(rowkey_vid_task_submitted_), + K(vid_rowkey_aux_table_id_), K(vid_rowkey_task_submitted_), + K(delta_buffer_table_id_), K(delta_buffer_task_submitted_), + K(index_id_table_id_), K(index_id_task_submitted_), + K(index_snapshot_data_table_id_), K(index_id_task_submitted_)); + } + break; + } + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not expected status", K(ret), K(status), K(*this)); + break; + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_COLLECT_LONGOPS_STAT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(copy_longops_stat(value))) { + LOG_WARN("failed to collect common longops stat", K(ret)); + } + + return ret; +} + +int ObVecIndexBuildTask::ChangeTaskStatusFn::operator()(common::hash::HashMapPair &entry) +{ + int ret = OB_SUCCESS; + const uint64_t task_key = entry.first; + const int64_t target_object_id = -1; + const int64_t child_task_id = entry.second.task_id_; + if (entry.second.ret_code_ == INT64_MAX) { + // maybe ddl already finish when switching rs + HEAP_VAR(ObDDLErrorMessageTableOperator::ObBuildDDLErrorMessage, error_message) { + int64_t unused_user_msg_len = 0; + ObAddr unused_addr; + if (OB_FAIL(ObDDLErrorMessageTableOperator::get_ddl_error_message( + dest_tenant_id_, + child_task_id, + target_object_id, + unused_addr, + false /* is_ddl_retry_task */, + *GCTX.sql_proxy_, + error_message, + unused_user_msg_len))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // ongoing child task + ret = OB_SUCCESS; + ObMySQLTransaction trans; + if (OB_FAIL(trans.start(&rt_service_->get_sql_proxy(), + dest_tenant_id_))) { + LOG_WARN("start transaction failed", K(ret)); + } else if (OB_FAIL(ObDDLTaskRecordOperator::update_task_status( + trans, dest_tenant_id_, child_task_id, ObDDLTaskStatus::FAIL))) { + LOG_WARN("update child task status failed", K(ret), K(child_task_id)); + } else { + int tmp_ret = trans.end(true/*commit*/); + if (OB_SUCCESS != tmp_ret) { + ret = (OB_SUCCESS == ret) ? tmp_ret : ret; + } + LOG_INFO("cancel not finished ddl task", K(dest_tenant_id_), + K(task_key), K(child_task_id), K(target_object_id)); + } + } else { + LOG_WARN("fail to get ddl error message", K(ret), K(task_key), + K(child_task_id), K(target_object_id)); + } + } + } + } + return ret; +} + +int ObVecIndexBuildTask::clean_on_failed() +{ + int ret = OB_SUCCESS; + bool state_finished = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (ObDDLTaskStatus::FAIL != task_status_) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task status not match", K(ret), K(task_status_)); + } else { + // 1. cancel ongoing build index task + ChangeTaskStatusFn change_statu_fn(dependent_task_result_map_, dst_tenant_id_, root_service_); + if (OB_FAIL(dependent_task_result_map_.foreach_refactored(change_statu_fn))) { + if (OB_ITER_END != ret) { + LOG_WARN("foreach refactored failed", K(ret), K(dst_tenant_id_)); + } else { + ret = OB_SUCCESS; // reach max dump count + } + } + // 2. drop already built index + if (OB_FAIL(ret)) { + } else if (!drop_index_task_submitted_) { + if (OB_FAIL(submit_drop_vec_index_task())) { + LOG_WARN("failed to drop vec index", K(ret)); + } + } else { + bool drop_index_finished = false; + if (OB_FAIL(wait_drop_index_finish(drop_index_finished))) { + LOG_WARN("failed to wait drop index task finish", K(ret)); + } else if (drop_index_finished) { + state_finished = true; + } + } + } + // judge index status to choose clean_on_failed() and drop index + if (OB_SUCC(ret) && state_finished) { + if (OB_FAIL(cleanup())) { + LOG_WARN("cleanup failed", K(ret)); + } + } + return ret; +} + +int ObVecIndexBuildTask::submit_drop_vec_index_task() +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *index_table_schema = nullptr; + const ObDatabaseSchema *database_schema = nullptr; + const ObTableSchema *data_table_schema = nullptr; + ObString index_name; + ObSqlString drop_index_sql; + bool is_index_exist = true; + ObMultiVersionSchemaService &schema_service = root_service_->get_schema_service(); + bool has_aux_table = (delta_buffer_table_id_ != OB_INVALID_ID); + uint64_t index_table_id = has_aux_table ? delta_buffer_table_id_ : index_table_id_; + if (OB_ISNULL(root_service_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("should not be null", K(ret)); + } else if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("get tenant schema guard failed", K(ret), K(tenant_id_)); + } else if (OB_FAIL(schema_guard.check_table_exist(tenant_id_, index_table_id, is_index_exist))) { + LOG_WARN("check table exist failed", K(ret), K_(tenant_id), K(index_table_id)); + } else if (!is_index_exist) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("vec index aux schema is nullptr, fail to roll back", K(ret)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, index_table_id, index_table_schema))) { + LOG_WARN("get index schema failed", K(ret), K(tenant_id_), K(index_table_id)); + } else if (OB_ISNULL(index_table_schema)) { + ret = OB_SCHEMA_ERROR; + LOG_WARN("index schema is null", K(ret), K(index_table_id)); + } else if (index_table_schema->is_in_recyclebin()) { + // the index has been dropped, just finish this task + } else if (OB_FAIL(index_table_schema->get_index_name(index_name))) { + LOG_WARN("get index name failed", KR(ret), K(index_table_schema->get_table_type()), KPC(index_table_schema)); + } else if (OB_FAIL(schema_guard.get_database_schema(tenant_id_, index_table_schema->get_database_id(), database_schema))) { + LOG_WARN("get database schema failed", KR(ret), K(index_table_schema->get_database_id())); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, index_table_schema->get_data_table_id(), data_table_schema))) { + LOG_WARN("get data table schema failed", KR(ret), K(index_table_schema->get_data_table_id())); + } else if (OB_UNLIKELY(nullptr == database_schema || nullptr == data_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null schema", KR(ret), KP(database_schema), KP(data_table_schema)); + } else { + int64_t ddl_rpc_timeout = 0; + obrpc::ObDropIndexArg drop_index_arg; + obrpc::ObDropIndexRes drop_index_res; + drop_index_arg.is_inner_ = true; + drop_index_arg.tenant_id_ = tenant_id_; + drop_index_arg.exec_tenant_id_ = tenant_id_; + drop_index_arg.index_table_id_ = index_table_id; + drop_index_arg.session_id_ = data_table_schema->get_session_id(); + drop_index_arg.index_name_ = index_name; + drop_index_arg.table_name_ = data_table_schema->get_table_name(); + drop_index_arg.database_name_ = database_schema->get_database_name_str(); + drop_index_arg.index_action_type_ = obrpc::ObIndexArg::DROP_INDEX; + drop_index_arg.is_add_to_scheduler_ = true; + drop_index_arg.task_id_ = task_id_; + drop_index_arg.is_vec_inner_drop_ = has_aux_table ? true : false; // if want to drop only one index, is_vec_inner_drop_ should be false, else should be true. + if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout(index_table_schema->get_all_part_num() + data_table_schema->get_all_part_num(), ddl_rpc_timeout))) { + LOG_WARN("failed to get ddl rpc timeout", KR(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DROP_INDEX_RPC_FAILED))) { + LOG_WARN("ddl sim failure", KR(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(root_service_->get_common_rpc_proxy().timeout(ddl_rpc_timeout).drop_index(drop_index_arg, drop_index_res))) { + LOG_WARN("drop index failed", KR(ret), K(ddl_rpc_timeout)); + } else { + drop_index_task_submitted_ = true; + drop_index_task_id_ = drop_index_res.task_id_; + if (OB_FAIL(update_task_message())) { + LOG_WARN("fail to update task message", K(ret)); + } + LOG_INFO("success submit drop vec index task", K(ret), K(drop_index_task_id_)); + } + } + return ret; +} + +int ObVecIndexBuildTask::wait_drop_index_finish(bool &is_finish) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (ObDDLTaskStatus::FAIL != task_status_) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task status not match", K(ret), K(task_status_)); + } else if (-1 == drop_index_task_id_) { + is_finish = true; + } else { + HEAP_VAR(ObDDLErrorMessageTableOperator::ObBuildDDLErrorMessage, error_message) { + const int64_t target_object_id = -1; + int64_t unused_user_msg_len = 0; + ObAddr unused_addr; + if (OB_FAIL(ObDDLErrorMessageTableOperator::get_ddl_error_message( + dst_tenant_id_, + drop_index_task_id_, + target_object_id, + unused_addr, + false /* is_ddl_retry_task */, + *GCTX.sql_proxy_, + error_message, + unused_user_msg_len))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + LOG_INFO("ddl task not finish", K(dst_tenant_id_), K(drop_index_task_id_)); + } else { + LOG_WARN("fail to get ddl error message", K(ret), K(drop_index_task_id_)); + } + } else { + if (error_message.ret_code_ != OB_SUCCESS) { + ret = error_message.ret_code_; + drop_index_task_submitted_ = false; // retry + } else { + is_finish = true; + } + } + } + } + return ret; +} + +int ObVecIndexBuildTask::succ() +{ + return cleanup(); +} + +int ObVecIndexBuildTask::validate_checksum() +{ + int ret = OB_SUCCESS; + bool state_finished = false; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (ObDDLTaskStatus::VALIDATE_CHECKSUM != task_status_) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task status not match", K(ret), K(task_status_)); + } else { + // TODO @wuxingying: validate checksum, set next status to FAIL if validation failed + if (OB_SUCC(ret)) { + state_finished = true; + } + } + if (state_finished && OB_SUCC(ret)) { + ObDDLTaskStatus next_status; + if (OB_FAIL(get_next_status(next_status))) { + LOG_WARN("failed to get next status", K(ret)); + } else { + (void)switch_status(next_status, true, ret); + LOG_INFO("validate checksum finished", K(ret), K(parent_task_id_), + K(task_id_), K(*this)); + } + } + return ret; +} + +int ObVecIndexBuildTask::cleanup_impl() +{ + int ret = OB_SUCCESS; + ObString unused_str; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(report_error_code(unused_str))) { + LOG_WARN("report error code failed", K(ret)); + } else { + const uint64_t data_table_id = object_id_; + const uint64_t index_table_id = index_table_id_; + ObMultiVersionSchemaService &schema_service = root_service_->get_schema_service(); + ObSchemaGetterGuard schema_guard; + const ObTableSchema *data_schema = nullptr; + int64_t refreshed_schema_version = 0; + ObTableLockOwnerID owner_id; + ObMySQLTransaction trans; + if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id_, + schema_guard))) { + LOG_WARN("get tenant schema guard failed", K(ret), K(tenant_id_)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, + data_table_id, + data_schema))) { + LOG_WARN("fail to get table schema", K(ret), K(data_table_id)); + } else if (OB_ISNULL(data_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("fail to get table schema", K(ret), KP(data_schema)); + } else if (OB_FAIL(trans.start(&root_service_->get_sql_proxy(), dst_tenant_id_))) { + LOG_WARN("start transaction failed", K(ret)); + } else if (OB_FAIL(owner_id.convert_from_value(ObLockOwnerType::DEFAULT_OWNER_TYPE, + task_id_))) { + LOG_WARN("failed to get owner id", K(ret), K(task_id_)); + } else if (OB_FAIL(ObDDLLock::unlock_for_add_drop_index(*data_schema, + index_table_id, + false, + owner_id, + trans))) { + LOG_WARN("failed to unlock online ddl lock", K(ret)); + } + if (trans.is_started()) { + int tmp_ret = trans.end(true/*commit*/); + if (OB_SUCCESS != tmp_ret) { + LOG_WARN("trans end failed", "is_commit", OB_SUCCESS == ret, K(tmp_ret)); + ret = (OB_SUCCESS == ret) ? tmp_ret : ret; + } + } + } + + DEBUG_SYNC(CREATE_INDEX_SUCCESS); + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObDDLTaskRecordOperator::delete_record(root_service_->get_sql_proxy(), + tenant_id_, + task_id_))) { + LOG_WARN("delete task record failed", K(ret), K(task_id_), K(schema_version_)); + } else { + need_retry_ = false; // clean succ, stop the task + } + + if (OB_SUCC(ret) && parent_task_id_ > 0) { + const ObDDLTaskID parent_task_id(tenant_id_, parent_task_id_); + root_service_->get_ddl_task_scheduler().on_ddl_task_finish(parent_task_id, + get_task_key(), + ret_code_, trace_id_); + } + LOG_INFO("clean task finished", K(ret), K(*this)); + return ret; +} + +int ObVecIndexBuildTask::update_task_message() +{ + int ret = OB_SUCCESS; + char *buf = nullptr; + int64_t pos = 0; + ObString msg; + common::ObArenaAllocator allocator("ObVecIndexBuild"); + const int64_t serialize_param_size = get_serialize_param_size(); + + if (OB_ISNULL(buf = static_cast(allocator.alloc(serialize_param_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory", KR(ret), K(serialize_param_size)); + } else if (OB_FAIL(serialize_params_to_message(buf, serialize_param_size, pos))) { + LOG_WARN("failed to serialize params to message", KR(ret)); + } else { + msg.assign(buf, serialize_param_size); + if (OB_FAIL(ObDDLTaskRecordOperator::update_message(root_service_->get_sql_proxy(), tenant_id_, task_id_, msg))) { + LOG_WARN("failed to update message", KR(ret)); + } + } + return ret; +} + +} // end namespace rootserver +} // end namespace oceanbase diff --git a/src/rootserver/ddl_task/ob_vec_index_build_task.h b/src/rootserver/ddl_task/ob_vec_index_build_task.h new file mode 100644 index 0000000000..e6a6839579 --- /dev/null +++ b/src/rootserver/ddl_task/ob_vec_index_build_task.h @@ -0,0 +1,192 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_ROOTSERVER_OB_VEC_INDEX_BUILD_TASK_H_ +#define OCEANBASE_ROOTSERVER_OB_VEC_INDEX_BUILD_TASK_H_ + +#include "share/schema/ob_schema_getter_guard.h" +#include "share/ob_domain_index_builder_util.h" + +namespace oceanbase +{ +namespace rootserver +{ + +class ObVecIndexBuildTask : public ObDDLTask +{ +public: + ObVecIndexBuildTask(); + virtual ~ObVecIndexBuildTask(); + int init( + const uint64_t tenant_id, + const int64_t task_id, + const ObTableSchema *data_table_schema, + const ObTableSchema *index_schema, + const int64_t schema_version, + const int64_t parallelism, + const int64_t consumer_group_id, + const obrpc::ObCreateIndexArg &create_index_arg, + const uint64_t tenant_data_version, + const int64_t parent_task_id = 0, + const int64_t task_status = share::ObDDLTaskStatus::PREPARE, + const int64_t snapshot_version = 0); + int init(const ObDDLTaskRecord &task_record); + virtual int process() override; + virtual int cleanup_impl() override; + virtual bool is_valid() const override; + virtual int collect_longops_stat(share::ObLongopsValue &value) override; + virtual int serialize_params_to_message( + char *buf, + const int64_t buf_size, + int64_t &pos) const override; + virtual int deserialize_params_from_message( + const uint64_t tenant_id, + const char *buf, + const int64_t buf_size, + int64_t &pos) override; + virtual int64_t get_serialize_param_size() const override; + virtual bool support_longops_monitoring() const override { return true; } + virtual int on_child_task_finish( + const uint64_t child_task_key, + const int ret_code) override; + TO_STRING_KV(K(index_table_id_), K(rowkey_vid_aux_table_id_), + K(vid_rowkey_aux_table_id_), K(delta_buffer_table_id_), + K(index_id_table_id_), K(index_snapshot_data_table_id_), + K(rowkey_vid_task_submitted_), K(vid_rowkey_task_submitted_), + K(delta_buffer_task_submitted_), K(index_id_task_submitted_), + K(index_snapshot_data_task_submitted_), K(rowkey_vid_task_id_), + K(vid_rowkey_task_id_), K(delta_buffer_task_id_), + K(index_id_task_id_), K(index_snapshot_task_id_), K(drop_index_task_id_), K(is_rebuild_index_), + K(drop_index_task_submitted_), K(schema_version_), K(execution_id_), + K(consumer_group_id_), K(trace_id_), K(parallelism_), K(create_index_arg_)); + +private: + int get_next_status(share::ObDDLTaskStatus &next_status); + int prepare_aux_table(const ObIndexType index_type, + bool &task_submitted, + uint64_t &aux_table_id, + int64_t &task_id); + int construct_create_index_arg(const ObIndexType index_type, + obrpc::ObCreateIndexArg &arg); + int prepare_rowkey_vid_table(); + int prepare_aux_index_tables(); + int prepare_vid_rowkey_table(); + int construct_rowkey_vid_arg(obrpc::ObCreateIndexArg &arg); + int construct_vid_rowkey_arg(obrpc::ObCreateIndexArg &arg); + int construct_delta_buffer_arg(obrpc::ObCreateIndexArg &arg); + int construct_index_id_arg(obrpc::ObCreateIndexArg &arg); + int construct_index_snapshot_data_arg(obrpc::ObCreateIndexArg &arg); + int record_index_table_id( + const obrpc::ObCreateIndexArg *create_index_arg_, + uint64_t &aux_table_id); + int get_index_table_id( + const obrpc::ObCreateIndexArg *create_index_arg, + uint64_t &index_table_id); + int prepare(); + int wait_aux_table_complement(); + int validate_checksum(); + int clean_on_failed(); + int submit_drop_vec_index_task(); + int wait_drop_index_finish(bool &is_finish); + int succ(); + int update_index_status_in_schema( + const ObTableSchema &index_schema, + const ObIndexStatus new_status); + int check_health(); + int check_aux_table_schemas_exist(bool &is_all_exist); + int deep_copy_index_arg( + common::ObIAllocator &allocator, + const obrpc::ObCreateIndexArg &source_arg, + obrpc::ObCreateIndexArg &dest_arg); + int print_child_task_ids(char *buf, int64_t len); + int update_task_message(); + +private: + struct ChangeTaskStatusFn final + { + public: + ChangeTaskStatusFn(common::hash::ObHashMap &dependent_task_result_map, const uint64_t tenant_id, ObRootService *root_service) : + dependent_task_result_map_(dependent_task_result_map), + rt_service_(root_service), + dest_tenant_id_(tenant_id) + {} + public: + ~ChangeTaskStatusFn() = default; + int operator() (common::hash::HashMapPair &entry); + public: + common::hash::ObHashMap &dependent_task_result_map_; + ObRootService *rt_service_; + uint64_t dest_tenant_id_; + }; + struct CheckTaskStatusFn final + { + public: + CheckTaskStatusFn(common::hash::ObHashMap &dependent_task_result_map, + int64_t &finished_task_cnt, bool &child_task_failed, bool &state_finished, const uint64_t tenant_id) : + dependent_task_result_map_(dependent_task_result_map), + finished_task_cnt_(finished_task_cnt), + child_task_failed_(child_task_failed), + state_finished_(state_finished), + dest_tenant_id_(tenant_id) + {} + public: + ~CheckTaskStatusFn() = default; + int operator() (common::hash::HashMapPair &entry); + public: + common::hash::ObHashMap &dependent_task_result_map_; + int64_t &finished_task_cnt_; + bool &child_task_failed_; + bool &state_finished_; + uint64_t dest_tenant_id_; + }; + static const int64_t OB_VEC_INDEX_BUILD_TASK_VERSION = 1; + static const int64_t OB_VEC_INDEX_BUILD_CHILD_TASK_NUM = 5; + using ObDDLTask::tenant_id_; + using ObDDLTask::task_id_; + using ObDDLTask::schema_version_; + using ObDDLTask::parallelism_; + using ObDDLTask::consumer_group_id_; + using ObDDLTask::parent_task_id_; + using ObDDLTask::task_status_; + using ObDDLTask::snapshot_version_; + using ObDDLTask::object_id_; + using ObDDLTask::target_object_id_; + using ObDDLTask::is_inited_; + uint64_t &index_table_id_; + uint64_t rowkey_vid_aux_table_id_; + uint64_t vid_rowkey_aux_table_id_; + uint64_t delta_buffer_table_id_; + uint64_t index_id_table_id_; + uint64_t index_snapshot_data_table_id_; + bool rowkey_vid_task_submitted_; + bool vid_rowkey_task_submitted_; + bool delta_buffer_task_submitted_; + bool index_id_task_submitted_; + bool index_snapshot_data_task_submitted_; + int64_t rowkey_vid_task_id_; + int64_t vid_rowkey_task_id_; + int64_t delta_buffer_task_id_; + int64_t index_id_task_id_; + int64_t index_snapshot_task_id_; + bool drop_index_task_submitted_; + int64_t drop_index_task_id_; + bool is_rebuild_index_; + ObRootService *root_service_; + ObDDLWaitTransEndCtx wait_trans_ctx_; + obrpc::ObCreateIndexArg create_index_arg_; + common::hash::ObHashMap dependent_task_result_map_; +}; + +} // end namespace rootserver +} // end namespace oceanbase + +#endif /* OCEANBASE_ROOTSERVER_OB_VEC_INDEX_BUILD_TASK_H_*/ diff --git a/src/rootserver/ob_ddl_operator.cpp b/src/rootserver/ob_ddl_operator.cpp index 5c727bc963..100f783afa 100644 --- a/src/rootserver/ob_ddl_operator.cpp +++ b/src/rootserver/ob_ddl_operator.cpp @@ -22,6 +22,7 @@ #include "share/ob_autoincrement_service.h" #include "share/ob_cluster_version.h" #include "share/ob_fts_index_builder_util.h" +#include "share/ob_vec_index_builder_util.h" #include "share/resource_manager/ob_resource_manager_proxy.h" #include "share/schema/ob_schema_service.h" #include "share/schema/ob_schema_getter_guard.h" @@ -3598,28 +3599,56 @@ int ObDDLOperator::alter_table_rename_index( LOG_WARN("fail to alter table rename index", K(ret), K(tenant_id), KPC(index_table_schema), K(new_index_table_name)); } else if (is_fts_index_aux(index_table_schema->get_index_type())) { - if (OB_FAIL(alter_table_rename_built_in_fts_index_(tenant_id, - data_table_id, - database_id, - index_name, - new_index_name, - new_index_status, - schema_guard, - trans, - allocator))) { + if (OB_FAIL(alter_table_rename_built_in_index_(tenant_id, + data_table_id, + database_id, + INDEX_TYPE_FTS_DOC_WORD_LOCAL, /* index_type */ + index_name, + new_index_name, + new_index_status, + schema_guard, + trans, + allocator))) { LOG_WARN("failed to rename built in fts index", K(ret), K(tenant_id), K(data_table_id), K(database_id), K(index_name), K(new_index_name)); } + } else if (is_vec_delta_buffer_type(index_table_schema->get_index_type())) { + if (OB_FAIL(alter_table_rename_built_in_index_(tenant_id, + data_table_id, + database_id, + INDEX_TYPE_VEC_INDEX_ID_LOCAL, /* index_type */ + index_name, + new_index_name, + new_index_status, + schema_guard, + trans, + allocator))) { + LOG_WARN("failed to rename built in delta_buffer_table index", K(ret), K(tenant_id), + K(data_table_id), K(database_id), K(index_name), K(new_index_name)); + } else if (OB_FAIL(alter_table_rename_built_in_index_(tenant_id, + data_table_id, + database_id, + INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL, /* index_type */ + index_name, + new_index_name, + new_index_status, + schema_guard, + trans, + allocator))) { + LOG_WARN("failed to rename built in index_snapshot_data_table index", K(ret), K(tenant_id), + K(data_table_id), K(database_id), K(index_name), K(new_index_name)); + } } } } return ret; } -int ObDDLOperator::alter_table_rename_built_in_fts_index_( +int ObDDLOperator::alter_table_rename_built_in_index_( const uint64_t tenant_id, const uint64_t data_table_id, const uint64_t database_id, + const ObIndexType index_type, const ObString &index_name, const ObString &new_index_name, const ObIndexStatus *new_index_status, @@ -3628,49 +3657,65 @@ int ObDDLOperator::alter_table_rename_built_in_fts_index_( ObArenaAllocator &allocator) { int ret = OB_SUCCESS; - SMART_VARS_3((ObTableSchema, new_fts_doc_word_schema), + SMART_VARS_3((ObTableSchema, new_table_schema), (obrpc::ObCreateIndexArg, origin_index_arg), (obrpc::ObCreateIndexArg, new_index_arg)) { - const ObTableSchema *origin_fts_doc_word_schema = NULL; + const ObTableSchema *origin_table_schema = NULL; origin_index_arg.index_name_ = index_name; - origin_index_arg.index_type_ = INDEX_TYPE_FTS_DOC_WORD_LOCAL; + origin_index_arg.index_type_ = index_type; new_index_arg.index_name_ = new_index_name; - new_index_arg.index_type_ = INDEX_TYPE_FTS_DOC_WORD_LOCAL; - ObString origin_fts_doc_word_index_table_name; - ObString new_fts_doc_word_index_table_name; - if (OB_FAIL(ObFtsIndexBuilderUtil::generate_fts_aux_index_name(origin_index_arg, &allocator))) { - LOG_WARN("failed to generate origin fts doc word name", K(ret)); - } else if (OB_FAIL(ObFtsIndexBuilderUtil::generate_fts_aux_index_name(new_index_arg, &allocator))) { - LOG_WARN("failed to generate new fts doc word name", K(ret)); + new_index_arg.index_type_ = index_type; + ObString origin_index_table_name; + ObString new_index_table_name; + if (is_fts_index(index_type)) { // fts index + if (OB_FAIL(ObFtsIndexBuilderUtil::generate_fts_aux_index_name(origin_index_arg, &allocator))) { + LOG_WARN("failed to generate origin fts doc word name", K(ret), K(index_type)); + } else if (OB_FAIL(ObFtsIndexBuilderUtil::generate_fts_aux_index_name(new_index_arg, &allocator))) { + LOG_WARN("failed to generate new fts doc word name", K(ret), K(index_type)); + } + } else if (is_vec_index(index_type)) { // vector index + if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator, + index_type, + index_name, + origin_index_arg.index_name_))) { + LOG_WARN("failed to generate origin vec index name", K(ret), K(index_type)); + } else if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator, + index_type, + new_index_name, + new_index_arg.index_name_))) { + LOG_WARN("failed to generate new vec index name", K(ret), K(index_type)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index type", K(ret), K(index_type)); + } + + if (OB_FAIL(ret)) { } else if (OB_FAIL(ObTableSchema::build_index_table_name(allocator, data_table_id, origin_index_arg.index_name_, - origin_fts_doc_word_index_table_name))) { - LOG_WARN("failed to build origin fts doc word table name", K(ret), - K(data_table_id), K(origin_index_arg.index_name_)); + origin_index_table_name))) { + LOG_WARN("failed to build origin table name", K(ret), K(data_table_id), K(origin_index_arg.index_name_)); } else if (OB_FAIL(ObTableSchema::build_index_table_name(allocator, data_table_id, new_index_arg.index_name_, - new_fts_doc_word_index_table_name))) { - LOG_WARN("failed to build new fts doc word table name", K(ret), - K(data_table_id), K(new_index_arg.index_name_)); + new_index_table_name))) { + LOG_WARN("failed to build new table name", K(ret), K(data_table_id), K(new_index_arg.index_name_)); } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, database_id, - origin_fts_doc_word_index_table_name, + origin_index_table_name, true/*is_index*/, - origin_fts_doc_word_schema, + origin_table_schema, false/*is_hidden*/, true/*is_built_in_index*/))) { LOG_WARN("failed to get origin fts_doc_word schema", K(ret)); } else if (OB_FAIL(inner_alter_table_rename_index_(tenant_id, - origin_fts_doc_word_schema, - new_fts_doc_word_index_table_name, + origin_table_schema, + new_index_table_name, new_index_status, trans, - new_fts_doc_word_schema))) { - LOG_WARN("fail to alter table rename fts doc word index", - K(ret), K(tenant_id), KPC(origin_fts_doc_word_schema), - K(new_fts_doc_word_schema)); + new_table_schema))) { + LOG_WARN("fail to alter table rename index", K(ret), K(tenant_id), KPC(origin_table_schema), K(new_table_schema)); } } return ret; diff --git a/src/rootserver/ob_ddl_operator.h b/src/rootserver/ob_ddl_operator.h index d06cf7e1c9..2a7a362b4f 100644 --- a/src/rootserver/ob_ddl_operator.h +++ b/src/rootserver/ob_ddl_operator.h @@ -1176,10 +1176,11 @@ private: int64_t routine_id); private: - int alter_table_rename_built_in_fts_index_( + int alter_table_rename_built_in_index_( const uint64_t tenant_id, const uint64_t data_table_id, const uint64_t database_id, + const ObIndexType index_type, const ObString &index_name, const ObString &new_index_name, const ObIndexStatus *new_index_status, diff --git a/src/rootserver/ob_ddl_service.cpp b/src/rootserver/ob_ddl_service.cpp index dc889307bb..7fe0b9ec87 100755 --- a/src/rootserver/ob_ddl_service.cpp +++ b/src/rootserver/ob_ddl_service.cpp @@ -123,6 +123,8 @@ #include "storage/mview/ob_mview_sched_job_utils.h" #include "rootserver/restore/ob_tenant_clone_util.h" #include "rootserver/mview/ob_mview_dependency_service.h" +#include "src/share/ob_vec_index_builder_util.h" +#include "share/vector_index/ob_vector_index_util.h" #include "rootserver/direct_load/ob_direct_load_partition_exchange.h" namespace oceanbase @@ -5898,6 +5900,115 @@ int ObDDLService::remap_index_tablets_and_take_effect( return ret; } +int ObDDLService::switch_index_name_and_status_for_vec_index_table(obrpc::ObAlterTableArg &alter_table_arg) +{ + int ret = OB_SUCCESS; + + LOG_DEBUG("switch_index_name_and_status_for_vec_index_table", K(alter_table_arg)); + + const int64_t old_index_id = alter_table_arg.table_id_; + const int64_t new_index_id = alter_table_arg.hidden_table_id_; + const int64_t tenant_id = alter_table_arg.alter_table_schema_.get_tenant_id(); + common::ObIAllocator &allocator = alter_table_arg.allocator_; + if (OB_FAIL(check_inner_stat())) { + LOG_WARN("variable is not init", K(ret)); + } else if (old_index_id == OB_INVALID_ID || new_index_id == OB_INVALID_ID + || tenant_id == OB_INVALID_TENANT_ID) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(old_index_id), K(new_index_id), K(tenant_id)); + } else { + ObDDLSQLTransaction trans(schema_service_); + const ObTableSchema *old_table_schema = NULL; + const ObTableSchema *new_table_schema = NULL; + ObSchemaGetterGuard schema_guard; + ObDDLOperator ddl_operator(*schema_service_, *sql_proxy_); + schema_guard.set_session_id(alter_table_arg.session_id_); + int64_t refreshed_schema_version = 0; + if (OB_FAIL(get_tenant_schema_guard_with_version_in_inner_table(tenant_id, schema_guard))) { + LOG_WARN("fail to get schema guard with version in inner table", K(ret), K(tenant_id)); + } else if (OB_FAIL(schema_guard.get_schema_version(tenant_id, refreshed_schema_version))) { + LOG_WARN("failed to get tenant schema version", KR(ret), K(tenant_id)); + } else if (OB_FAIL(trans.start(sql_proxy_, tenant_id, refreshed_schema_version))) { + LOG_WARN("fail to start trans, ", KR(ret), K(tenant_id), K(refreshed_schema_version)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, old_index_id, old_table_schema))) { + LOG_WARN("fail to get old index table schema", K(ret), K(old_index_id)); + } else if (OB_ISNULL(old_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpectd null pointer", K(ret)); + } else if (!old_table_schema->is_vec_index() || !old_table_schema->is_vec_delta_buffer_type()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("old index table must be vec delta_buffer_table", K(ret), K(old_table_schema)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, new_index_id, new_table_schema))) { + LOG_WARN("fail to get new index table schema", K(ret), K(new_index_id)); + } else if (OB_ISNULL(new_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpectd null pointer", K(ret)); + } else if (!new_table_schema->is_vec_index() || !new_table_schema->is_vec_delta_buffer_type()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("new index table must be vec delta_buffer_table", K(ret), K(new_table_schema)); + } else if (old_table_schema->get_database_id() != new_table_schema->get_database_id()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("old table and new table should in same database", K(ret)); + } else if (old_table_schema->get_data_table_id() != new_table_schema->get_data_table_id()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("old table and new table should in same data table", K(ret)); + } else { + const int64_t database_id = old_table_schema->get_database_id(); + const int64_t data_table_id = old_table_schema->get_data_table_id(); + const bool is_index = true; + ObString old_domain_index_name = old_table_schema->get_table_name_str(); + ObString new_domain_index_name = new_table_schema->get_table_name_str(); + ObArray new_table_names; + ObArray old_table_names; + + SMART_VAR(ObSArray, table_schemas) { + // ObSArray table_schemas; + const int64_t EXPECTED_UPDATE_TABLE_CNT = 7; // 3 old index table + 3 new index table + 1 data data + if (OB_FAIL(ObVectorIndexUtil::generate_switch_index_names(old_domain_index_name, + new_domain_index_name, + allocator, + old_table_names, + new_table_names))) { + LOG_WARN("fail to generate switch index names"); + } else if (OB_FAIL(ObVectorIndexUtil::update_index_tables_status(tenant_id, + database_id, + old_table_names, + new_table_names, + ddl_operator, + schema_guard, + trans, + table_schemas))) { + LOG_WARN("fail to update index table status", K(ret), K(tenant_id)); + } else if (OB_FAIL(ObVectorIndexUtil::update_index_tables_attributes(tenant_id, + database_id, + data_table_id, + EXPECTED_UPDATE_TABLE_CNT, + old_table_names, + new_table_names, + ddl_operator, + schema_guard, + trans, + table_schemas))) { + LOG_WARN("fail to update index table attribute", K(ret), K(tenant_id)); + } + } // end smart_var table_schemas + } + if (trans.is_started()) { + int temp_ret = OB_SUCCESS; + if (OB_SUCCESS != (temp_ret = trans.end(OB_SUCC(ret)))) { + LOG_WARN("trans end failed", "is_commit", OB_SUCCESS == ret, K(temp_ret)); + ret = (OB_SUCC(ret)) ? temp_ret : ret; + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(publish_schema(tenant_id))) { + LOG_WARN("publish_schema failed", K(ret)); + } + return ret; +} + int ObDDLService::update_autoinc_schema(obrpc::ObAlterTableArg &alter_table_arg) { int ret = OB_SUCCESS; @@ -6182,31 +6293,32 @@ int ObDDLService::lock_tables_of_database(const ObDatabaseSchema &database_schem return ret; } -int ObDDLService::check_schema_generated_for_aux_index_schema_( - const obrpc::ObGenerateAuxIndexSchemaArg &arg, +int ObDDLService::check_aux_index_schema_exist_( + const uint64_t tenant_id, + const obrpc::ObCreateIndexArg &arg, ObSchemaGetterGuard &schema_guard, const ObTableSchema *data_schema, - bool &schema_generated, - uint64_t &aux_index_table_id) + bool &is_exist, + const ObTableSchema *&index_schema) { int ret = OB_SUCCESS; - schema_generated = false; - aux_index_table_id = OB_INVALID_ID; + is_exist = false; + index_schema = nullptr; ObArenaAllocator allocator(ObModIds::OB_SCHEMA); - const uint64_t tenant_id = arg.tenant_id_; - ObIndexType index_type = arg.create_index_arg_.index_type_; + ObIndexType index_type = arg.index_type_; ObString index_table_name; - if (OB_ISNULL(data_schema)) { + if (tenant_id == OB_INVALID_ID || + !arg.is_valid() || + OB_ISNULL(data_schema)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), KPC(data_schema)); + LOG_WARN("invalid argument", K(ret), K(tenant_id), K(arg), KPC(data_schema)); } else if (OB_FAIL(ObTableSchema::build_index_table_name(allocator, data_schema->get_table_id(), - arg.create_index_arg_.index_name_, + arg.index_name_, index_table_name))) { LOG_WARN("failed to construct index table name", K(ret), - K(arg.create_index_arg_.index_name_)); + K(arg.index_name_)); } else if (share::schema::is_fts_index(index_type)) { - const ObTableSchema *index_schema = nullptr; if (OB_FAIL(schema_guard.get_table_schema(tenant_id, data_schema->get_database_id(), index_table_name, @@ -6214,13 +6326,38 @@ int ObDDLService::check_schema_generated_for_aux_index_schema_( index_schema, false/*with_hidden_flag*/, share::schema::is_built_in_fts_index(index_type)))) { - LOG_WARN("failed to get index schema", K(ret), K(tenant_id), K(index_table_name)); + if (OB_TABLE_NOT_EXIST == ret) { + is_exist = false; + index_schema = nullptr; + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get index schema", K(ret), K(tenant_id), K(index_table_name)); + } } else if (OB_NOT_NULL(index_schema)) { - schema_generated = true; - aux_index_table_id = index_schema->get_table_id(); + is_exist = true; LOG_INFO("fts index aux table already exist, no need to generate", K(index_table_name)); } + } else if (share::schema::is_vec_index(index_type)) { + if (OB_FAIL(schema_guard.get_table_schema(tenant_id, + data_schema->get_database_id(), + index_table_name, + true/*is_index*/, + index_schema, + false/*with_hidden_flag*/, + share::schema::is_built_in_vec_index(index_type)))) { + if (OB_TABLE_NOT_EXIST == ret) { + is_exist = false; + index_schema = nullptr; + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get index schema", K(ret), K(tenant_id), K(index_table_name)); + } + } else if (OB_NOT_NULL(index_schema)) { + is_exist = true; + LOG_INFO("vec index aux table already exist, no need to generate", + K(index_table_name)); + } } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected index type", K(ret), K(index_type)); @@ -6228,25 +6365,155 @@ int ObDDLService::check_schema_generated_for_aux_index_schema_( return ret; } -int ObDDLService::generate_aux_index_schema( - const obrpc::ObGenerateAuxIndexSchemaArg &arg, - obrpc::ObGenerateAuxIndexSchemaRes &result) +int ObDDLService::generate_aux_index_schema_( + const uint64_t tenant_id, + ObSchemaGetterGuard &schema_guard, + ObCreateIndexArg &create_index_arg, + ObTableSchema &nonconst_data_schema, + const ObTableSchema *data_schema, + ObIArray &gen_columns, + ObDDLSQLTransaction &trans, + const uint64_t tenant_data_version, + ObTableSchema &index_schema) +{ + int ret = OB_SUCCESS; + if (!create_index_arg.is_valid() || OB_INVALID_TENANT_ID == tenant_id || + OB_ISNULL(data_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(create_index_arg), KPC(data_schema)); + } else { + ObIndexBuilder index_builder(*this); + const bool global_index_without_column_info = true; + if (create_index_arg.is_rebuild_index_) { + if (OB_FAIL(ObVectorIndexUtil::generate_index_schema_from_exist_table(tenant_id, + schema_guard, + *this, + create_index_arg, + *data_schema, + index_schema))) { + LOG_WARN("fail to generate index schema from exist table", K(ret), K(tenant_id), K(create_index_arg)); + } + } + if (OB_FAIL(ret)) { + } else if (!create_index_arg.is_rebuild_index_ && + OB_FAIL(index_builder.generate_schema(create_index_arg, + nonconst_data_schema, + global_index_without_column_info, + true/*generate_id*/, + index_schema))) { + LOG_WARN("fail to generate schema", K(ret), K(create_index_arg)); + } else if (OB_FAIL(nonconst_data_schema.check_create_index_on_hidden_primary_key(index_schema))) { + LOG_WARN("failed to check create index on table", K(ret), K(index_schema)); + } else if (gen_columns.empty()) { + if (OB_FAIL(create_index_table(create_index_arg, + tenant_data_version, + index_schema, + trans))) { + LOG_WARN("fail to create index", K(ret), K(index_schema)); + } + } else { + if (OB_FAIL(create_inner_expr_index(trans, + *data_schema, + tenant_data_version, + nonconst_data_schema, + gen_columns, + index_schema))) { + LOG_WARN("fail to create inner expr index", K(ret)); + } + } + } + return ret; +} + +int ObDDLService::create_aux_index_task_( + const ObTableSchema *data_schema, + const ObTableSchema *idx_schema, + ObCreateIndexArg &create_index_arg, + ObArenaAllocator &allocator, + const int64_t parent_task_id, + const uint64_t tenant_data_version, + ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(data_schema) || + OB_ISNULL(idx_schema) || + OB_ISNULL(GCTX.root_service_) || + OB_ISNULL(GCTX.sql_proxy_) || + !create_index_arg.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KPC(data_schema), KPC(idx_schema), + KP(GCTX.root_service_), KP(GCTX.sql_proxy_), K(create_index_arg)); + } else { + bool need_partitioned = ((DATA_VERSION_4_2_2_0 <= tenant_data_version && + tenant_data_version < DATA_VERSION_4_3_0_0) || + tenant_data_version >= DATA_VERSION_4_3_2_0) && + idx_schema->is_storage_local_index_table() && + idx_schema->is_partitioned_table(); + ObCreateDDLTaskParam param(data_schema->get_tenant_id(), + need_partitioned ? ObDDLType::DDL_CREATE_PARTITIONED_LOCAL_INDEX : ObDDLType::DDL_CREATE_INDEX, + data_schema, + idx_schema, + 0/*object_id*/, + idx_schema->get_schema_version(), + create_index_arg.parallelism_, + create_index_arg.consumer_group_id_, + &allocator, + &create_index_arg, + parent_task_id); + param.tenant_data_version_ = tenant_data_version; + if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler(). + create_ddl_task(param, *GCTX.sql_proxy_, task_record))) { + if (OB_ENTRY_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("submit create index ddl task failed", K(ret)); + } + } + } + return ret; +} + +int ObDDLService::create_aux_index( + const obrpc::ObCreateAuxIndexArg &arg, + obrpc::ObCreateAuxIndexRes &result) { int ret = OB_SUCCESS; const uint64_t tenant_id = arg.tenant_id_; const uint64_t data_table_id = arg.data_table_id_; + int64_t index_status = ObIndexStatus::INDEX_STATUS_NOT_FOUND; + ObArenaAllocator allocator(lib::ObLabel("DdlTaskTmp")); SMART_VARS_2((obrpc::ObCreateIndexArg, create_index_arg), (ObTableSchema, nonconst_data_schema)) { + ObDDLSQLTransaction trans(&get_schema_service()); + int64_t refreshed_schema_version = 0; + uint64_t tenant_data_version = 0; + bool schema_already_exist = false; + uint64_t index_table_id = OB_INVALID_ID; ObSchemaGetterGuard schema_guard; const ObTableSchema *data_schema = nullptr; + const ObTableSchema *idx_schema = nullptr; ObSEArray gen_columns; ObArenaAllocator allocator(lib::ObLabel("DdlTaskTmp")); + ObDDLTaskRecord task_record; if (!arg.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(arg)); + } else if (OB_ISNULL(GCTX.root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("root service is nullptr", K(ret)); } else if (OB_FAIL(get_tenant_schema_guard_with_version_in_inner_table(tenant_id, schema_guard))) { LOG_WARN("get schema guard failed", K(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); + } else if (OB_FAIL(schema_guard.get_schema_version(tenant_id, + refreshed_schema_version))) { + LOG_WARN("failed to get tenant schema version", KR(ret), K(tenant_id)); + } else if (OB_FAIL(trans.start(&get_sql_proxy(), + tenant_id, + refreshed_schema_version))) { + LOG_WARN("start transaction failed", KR(ret), K(tenant_id), + K(refreshed_schema_version)); } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, data_table_id, data_schema))) { @@ -6258,81 +6525,94 @@ int ObDDLService::generate_aux_index_schema( LOG_WARN("failed to assign to nonconst data schema", K(ret)); } else if (OB_FAIL(create_index_arg.assign(arg.create_index_arg_))) { LOG_WARN("fail to assign create index arg", K(ret)); - } else if (OB_FAIL(ObFtsIndexBuilderUtil::adjust_fts_args(create_index_arg, + } else if (share::schema::is_fts_index(create_index_arg.index_type_) + && OB_FAIL(ObFtsIndexBuilderUtil::adjust_fts_args(create_index_arg, nonconst_data_schema, allocator, gen_columns))) { LOG_WARN("fail to adjust expr index args", K(ret)); - } else { - if (OB_FAIL(check_schema_generated_for_aux_index_schema_(arg, - schema_guard, - data_schema, - result.schema_generated_, - result.aux_table_id_) )) { - LOG_WARN("failed to check if schema is generated for aux index table", K(ret), K(arg)); - } else if (result.schema_generated_) { - // do nothing - } else { - ObIndexBuilder index_builder(*this); - ObDDLSQLTransaction trans(&get_schema_service()); - const bool global_index_without_column_info = true; - int64_t refreshed_schema_version = 0; - uint64_t tenant_data_version = 0; - ObTableSchema index_schema; - if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { - LOG_WARN("get min data version failed", K(ret), K(tenant_id)); - } else if (OB_FAIL(schema_guard.get_schema_version(tenant_id, - refreshed_schema_version))) { - LOG_WARN("failed to get tenant schema version", KR(ret), K(tenant_id)); - } else if (OB_FAIL(trans.start(&get_sql_proxy(), - tenant_id, - refreshed_schema_version))) { - LOG_WARN("start transaction failed", KR(ret), K(tenant_id), - K(refreshed_schema_version)); - } else if (OB_FAIL(index_builder.generate_schema(create_index_arg, - nonconst_data_schema, - global_index_without_column_info, - true/*generate_id*/, - index_schema))) { - LOG_WARN("fail to generate schema", K(ret), K(create_index_arg)); - } else if (OB_FAIL(nonconst_data_schema.check_create_index_on_hidden_primary_key(index_schema))) { - LOG_WARN("failed to check create index on table", K(ret), K(index_schema)); - } else if (gen_columns.empty()) { - if (OB_FAIL(create_index_table(create_index_arg, - tenant_data_version, - index_schema, - trans))) { - LOG_WARN("fail to create index", K(ret), K(index_schema)); - } - } else { - if (OB_FAIL(create_inner_expr_index(trans, - *data_schema, - tenant_data_version, - nonconst_data_schema, - gen_columns, - index_schema))) { - LOG_WARN("fail to create inner expr index", K(ret)); - } - } - if (trans.is_started()) { - int temp_ret = OB_SUCCESS; - if (OB_SUCCESS != (temp_ret = trans.end(OB_SUCC(ret)))) { - LOG_WARN("trans end failed", "is_commit", OB_SUCCESS == ret, K(temp_ret)); - ret = (OB_SUCC(ret)) ? temp_ret : ret; - } - } - if (OB_SUCC(ret)) { - if (OB_FAIL(publish_schema(tenant_id))) { - LOG_WARN("fail to publish schema", K(ret), K(tenant_id)); - } else { - result.schema_generated_ = true; - result.aux_table_id_ = index_schema.get_table_id(); - } + } else if (!create_index_arg.is_rebuild_index_ + && share::schema::is_vec_index(create_index_arg.index_type_) + && OB_FAIL(ObVecIndexBuilderUtil::adjust_vec_args(create_index_arg, + nonconst_data_schema, + allocator, + gen_columns))) { + LOG_WARN("fail to adjust expr index args", K(ret)); + } else if (OB_FAIL(check_aux_index_schema_exist_(tenant_id, + arg.create_index_arg_, + schema_guard, + data_schema, + schema_already_exist, + idx_schema))) { + LOG_WARN("failed to check if schema is generated for aux index table", K(ret), K(arg)); + } else if (schema_already_exist) { + if (OB_ISNULL(idx_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("idx_schema is nullptr", K(ret)); + } else if (FALSE_IT(index_table_id = idx_schema->get_table_id())) { + } else if (FALSE_IT(index_status = idx_schema->get_index_status())) { + } else if (ObIndexStatus::INDEX_STATUS_AVAILABLE == index_status) { + // 1. index schema exists && index available, fetch index table id + result.schema_generated_ = true; + result.aux_table_id_ = index_table_id; + result.ddl_task_id_ = OB_INVALID_ID; // no need to wait task + } else { // 2. index schema exists && not available, create ddl task + result.schema_generated_ = true; + result.aux_table_id_ = index_table_id; + if (OB_FAIL(create_aux_index_task_(data_schema, + idx_schema, + create_index_arg, + allocator, + arg.task_id_/*parent fts*/, + tenant_data_version, + task_record))) { + LOG_WARN("failed to create aux index ddl task", K(ret), K(create_index_arg)); + } else if (FALSE_IT(result.ddl_task_id_ = task_record.task_id_)) { } } + } else { // 3. index scheme not exist, generate schema && create ddl task + ObTableSchema index_schema; + if (OB_FAIL(generate_aux_index_schema_(tenant_id, + schema_guard, + create_index_arg, + nonconst_data_schema, + data_schema, + gen_columns, + trans, + tenant_data_version, + index_schema))) { + LOG_WARN("failed to generate aux index schema", K(ret), K(create_index_arg)); + } else if (FALSE_IT(result.schema_generated_ = true)) { + } else if (FALSE_IT(result.aux_table_id_ = index_schema.get_table_id())) { + } else if (OB_FAIL(create_aux_index_task_(data_schema, + &index_schema, + create_index_arg, + allocator, + arg.task_id_/*parent fts*/, + tenant_data_version, + task_record))) { + LOG_WARN("failed to create aux index ddl task", K(ret), K(create_index_arg)); + } else if (FALSE_IT(result.ddl_task_id_ = task_record.task_id_)) { + } + } + if (trans.is_started()) { + int temp_ret = OB_SUCCESS; + if (OB_SUCCESS != (temp_ret = trans.end(OB_SUCC(ret)))) { + LOG_WARN("trans end failed", "is_commit", OB_SUCCESS == ret, K(temp_ret)); + ret = (OB_SUCC(ret)) ? temp_ret : ret; + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(publish_schema(tenant_id))) { + LOG_WARN("fail to publish schema", K(ret), K(tenant_id)); + } else if (OB_INVALID_ID == result.ddl_task_id_) { // no need to schedule + } else if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler(). + schedule_ddl_task(task_record))) { + LOG_WARN("fail to schedule ddl task", K(ret), K(task_record)); + } } } - LOG_INFO("finish generate aux index schema", K(ret), K(arg), K(result), "ddl_event_info", ObDDLEventInfo()); + LOG_INFO("finish create aux index", K(ret), K(arg), K(result), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -6733,7 +7013,7 @@ int ObDDLService::alter_table_index(obrpc::ObAlterTableArg &alter_table_arg, typedef common::ObSEArray TableSchemaArray; SMART_VAR(TableSchemaArray, new_index_schemas) { if (!drop_index_arg->is_inner_ && !index_table_schema->can_read_index() && OB_FAIL(ObDDLTaskRecordOperator::check_has_index_or_mlog_task( - trans, origin_table_schema.get_tenant_id(), origin_table_schema.get_table_id(), index_table_schema->get_table_id(), has_index_task))) { + trans, *index_table_schema, origin_table_schema.get_tenant_id(), origin_table_schema.get_table_id(), has_index_task))) { LOG_WARN("failed to check ddl conflict", K(ret)); } else if (has_index_task) { ret = OB_NOT_SUPPORTED; @@ -7702,6 +7982,15 @@ int ObDDLService::rename_dropping_index_name( } else if (OB_FAIL(new_index_schemas.push_back(*index_table_schema))) { LOG_WARN("fail to push back index schema", K(ret), KPC(index_table_schema)); } + } else if ((!drop_index_arg.is_inner_ || drop_index_arg.is_vec_inner_drop_)&& index_table_schema->is_vec_delta_buffer_type()) { + // This task is the parent task of drop vec index, no need to rename. + if (OB_FAIL(get_dropping_vec_index_invisiable_table_schema_(index_table_schema->get_tenant_id(), data_table_id, + index_table_schema->get_table_id(), drop_index_arg.is_vec_inner_drop_, index_table_schema->get_table_name_str(), schema_guard, ddl_operator, + trans, new_index_schemas))) { + LOG_WARN("fail to get dropping vec index table schema", K(ret), K(data_table_id), K(index_table_schema)); + } else if (OB_FAIL(new_index_schemas.push_back(*index_table_schema))) { + LOG_WARN("fail to push back index schema", K(ret), KPC(index_table_schema)); + } } else if ((nwrite = snprintf(buf, buf_size, "%s_%lu", "DELETING", ObTimeUtility::current_time())) >= buf_size || nwrite < 0) { ret = common::OB_BUF_NOT_ENOUGH; @@ -7817,6 +8106,118 @@ int ObDDLService::get_dropping_domain_index_invisiable_aux_table_schema( return ret; } +int ObDDLService::get_dropping_vec_index_invisiable_table_schema_( + const uint64_t tenant_id, + const uint64_t data_table_id, + const uint64_t index_table_id, + const bool is_vec_inner_drop, + const ObString &index_name, + share::schema::ObSchemaGetterGuard &schema_guard, + ObDDLOperator &ddl_operator, + common::ObMySQLTransaction &trans, + common::ObIArray &new_aux_schemas) +{ + int ret = OB_SUCCESS; + const share::schema::ObTableSchema *data_table_schema = nullptr; + ObSEArray indexs; + if (OB_UNLIKELY(OB_INVALID_ID == data_table_id + || OB_INVALID_ID == index_table_id + || OB_INVALID_TENANT_ID == tenant_id + || index_name.empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(data_table_id), K(index_table_id), K(tenant_id), K(index_name)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, data_table_id, data_table_schema))) { + LOG_WARN("fail to get index schema with data table id", K(ret), K(tenant_id), K(data_table_id)); + } else if (OB_ISNULL(data_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, data table schema is nullptr", K(ret), KP(data_table_schema)); + } else { + SMART_VAR(ObTableSchema, new_aux_schema) { + const ObIArray &indexs = data_table_schema->get_simple_index_infos(); + const share::schema::ObTableSchema *index_id_schema = nullptr; + const share::schema::ObTableSchema *snapshot_data_schema = nullptr; + const share::schema::ObTableSchema *rowkey_vid_schema = nullptr; + const share::schema::ObTableSchema *vid_rowkey_schema = nullptr; + ObArenaAllocator allocator(ObModIds::OB_SCHEMA); + bool is_index = true; + const int64_t database_id = data_table_schema->get_database_id(); + const bool is_hidden_flag = false; + const bool is_built_in_flag = true; + bool already_get_index_id_schema = false; + bool already_get_snapshot_data_table_schema = false; + + for (int64_t i = 0; OB_SUCC(ret) && i < indexs.count(); ++i) { + const share::schema::ObAuxTableMetaInfo &info = indexs.at(i); + if (share::schema::is_vec_rowkey_vid_type(info.index_type_)) { + if (OB_FAIL(schema_guard.get_table_schema(tenant_id, info.table_id_, rowkey_vid_schema))) { + LOG_WARN("fail to get vec rowkey vid table schema", K(ret), K(tenant_id), K(info)); + } else if (OB_ISNULL(rowkey_vid_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("rowkey_vid_schema is nullptr", K(ret), K(info)); + } else if (OB_FAIL(new_aux_schemas.push_back(*rowkey_vid_schema))) { + LOG_WARN("fail to push vec rowkey vid table schema", K(ret), KPC(rowkey_vid_schema)); + } + } else if (share::schema::is_vec_vid_rowkey_type(info.index_type_)) { + if (OB_FAIL(schema_guard.get_table_schema(tenant_id, info.table_id_, vid_rowkey_schema))) { + LOG_WARN("fail to get vec vid rowkey table schema", K(ret), K(tenant_id), K(info)); + } else if (OB_ISNULL(vid_rowkey_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("vid_rowkey_schema is nullptr", K(ret), K(info)); + } else if (OB_FAIL(new_aux_schemas.push_back(*vid_rowkey_schema))) { + LOG_WARN("fail to push vec vid rowkey table schema", K(ret), KPC(vid_rowkey_schema)); + } + } else if (share::schema::is_vec_index_id_type(info.index_type_)) { + // 通过索引名获取4号表 + if (already_get_index_id_schema) { + // 可能存在多个schema,但这里只取包含index_name字串的schema + } else if (OB_FAIL(ObVecIndexBuilderUtil::get_vec_table_schema_by_name(schema_guard, + tenant_id, + database_id, + index_name, + info.index_type_, + &allocator, + index_id_schema))) { + LOG_WARN("fail to generate vec index name", K(ret), K(info.index_type_)); + } else if (OB_ISNULL(index_id_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("index_id_schema is nullptr", K(ret), K(index_name)); + } else if (OB_FAIL(new_aux_schemas.push_back(*index_id_schema))) { + LOG_WARN("fail to push vec table schema", K(ret), K(index_name)); + } else { + already_get_index_id_schema = true; + } + } else if (share::schema::is_vec_index_snapshot_data_type(info.index_type_)) { + // 通过索引名获取5号表 + if (already_get_snapshot_data_table_schema) { + // 可能存在多个 index id schema,但这里只取包含index_name字串的schema + } else if (OB_FAIL(ObVecIndexBuilderUtil::get_vec_table_schema_by_name(schema_guard, + tenant_id, + database_id, + index_name, + info.index_type_, + &allocator, + snapshot_data_schema))) { + LOG_WARN("fail to generate vec index name", K(ret), K(info.index_type_)); + } else if (OB_ISNULL(snapshot_data_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("snapshot_data_schema is nullptr", K(ret), K(index_name)); + } else if (OB_FAIL(new_aux_schemas.push_back(*snapshot_data_schema))) { + LOG_WARN("fail to push vec table schema", K(ret), K(index_name)); + } else { + already_get_snapshot_data_table_schema = true; + } + } + if (OB_TABLE_NOT_EXIST == ret && is_vec_inner_drop) { + ret = OB_SUCCESS; + LOG_WARN("table is not exist, maybe index table have been drop already", K(ret)); + } + } + } + LOG_INFO("get dropping vec aux table name", K(ret), K(tenant_id), K(data_table_id), K(index_table_id)); + } + return ret; +} + int ObDDLService::generate_tmp_idx_schemas( const ObTableSchema &new_table_schema, ObIArray &idx_schemas, @@ -8239,7 +8640,9 @@ int ObDDLService::modify_generated_column_default_value(ObColumnSchemaV2 &genera ObString col_def; ObArenaAllocator allocator(ObModIds::OB_SCHEMA); ObRawExprFactory expr_factory(allocator); - SMART_VAR(ObSQLSessionInfo, default_session) { + SMART_VARS_3((ObSQLSessionInfo, default_session), (ObExecContext, exec_ctx, allocator), + (ObPhysicalPlanCtx, phy_plan_ctx, allocator)) { + LinkExecCtxGuard link_guard(default_session, exec_ctx); uint64_t tenant_id = table_schema.get_tenant_id(); const ObTenantSchema *tenant_schema = NULL; ObSchemaGetterGuard schema_guard; @@ -8260,6 +8663,8 @@ int ObDDLService::modify_generated_column_default_value(ObColumnSchemaV2 &genera LOG_WARN("session load default configs failed", K(ret)); } else if (OB_FAIL(generated_column.get_cur_default_value().get_string(col_def))) { LOG_WARN("get cur default value failed", K(ret)); + } else if (FALSE_IT(exec_ctx.set_physical_plan_ctx(&phy_plan_ctx))) { + } else if (FALSE_IT(exec_ctx.set_my_session(&default_session))) { } else if (OB_FAIL(ObRawExprUtils::build_generated_column_expr(NULL, col_def, expr_factory, @@ -8295,6 +8700,7 @@ int ObDDLService::modify_generated_column_default_value(ObColumnSchemaV2 &genera } } } + exec_ctx.set_physical_plan_ctx(NULL); } } return ret; @@ -8428,7 +8834,7 @@ int ObDDLService::modify_depend_column_type(sql::ObRawExpr *expr, LOG_WARN("extract column expr info failed", K(ret)); } } - if (OB_SUCC(ret) && column_schema.is_enum_or_set()) { + if (OB_SUCC(ret) && (column_schema.is_enum_or_set() || column_schema.is_collection())) { if (OB_FAIL(column_expr->set_enum_set_values(column_schema.get_extended_type_info()))) { LOG_WARN("failed to set enum set values", K(ret)); } @@ -13129,7 +13535,8 @@ int ObDDLService::alter_table_in_trans(obrpc::ObAlterTableArg &alter_table_arg, ObTableSchema &index_schema = create_index_arg->index_schema_; if (INDEX_TYPE_PRIMARY == create_index_arg->index_type_ || is_fts_index(create_index_arg->index_type_) || - is_multivalue_index(create_index_arg->index_type_)) { + is_multivalue_index(create_index_arg->index_type_) || + is_vec_index(create_index_arg->index_type_)) { // TODO hanxuan tempory bypass sumbit build fulltext index task // TODO yunyi tempory bypass sumbit build multi value index task } else { @@ -13470,7 +13877,7 @@ int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg, } if (OB_SUCC(ret) && is_double_table_long_running_ddl(ddl_type)) { bool has_index_operation = false; - bool has_fts_or_multivalue_index = false; + bool has_fts_or_multivalue_or_vec_index = false; bool is_adding_constraint = false; bool is_column_store = false; uint64_t table_id = alter_table_arg.alter_table_schema_.get_table_id(); @@ -13490,16 +13897,16 @@ int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg, } else if (OB_FAIL(check_has_domain_index(schema_guard, tenant_id, table_id, - has_fts_or_multivalue_index))) { + has_fts_or_multivalue_or_vec_index))) { LOG_WARN("check has fts index failed", K(ret)); } else if (OB_FAIL(check_is_adding_constraint(tenant_id, table_id, is_adding_constraint))) { LOG_WARN("failed to call check_is_adding_constraint", K(ret)); } else if (has_index_operation) { ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "The DDL cannot be run concurrently with creating index."); - } else if (has_fts_or_multivalue_index) { + } else if (has_fts_or_multivalue_or_vec_index) { ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "Run this DDL operation on table with fulltext search index or multivalue index."); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "Run this DDL operation on table with fulltext/multivalue/vector index."); } else if (is_adding_constraint) { ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "The DDL cannot be run concurrently with adding constraint."); @@ -13513,10 +13920,10 @@ int ObDDLService::check_has_domain_index( ObSchemaGetterGuard &schema_guard, const uint64_t tenant_id, const uint64_t data_table_id, - bool &fts_exist) + bool &domain_index_exist) { int ret = OB_SUCCESS; - fts_exist = false; + domain_index_exist = false; ObRootService *root_service = GCTX.root_service_; const ObTableSchema *table_schema = nullptr; if (OB_ISNULL(root_service)) { @@ -13533,8 +13940,10 @@ int ObDDLService::check_has_domain_index( if (index_infos.count() > 0) { // if there is indexes in new tables, if so, the indexes is already rebuilt in new table for (int64_t i = 0; OB_SUCC(ret) && i < index_infos.count(); ++i) { - if (share::schema::is_doc_rowkey_aux(index_infos.at(i).index_type_)) { - fts_exist = true; + if (share::schema::is_doc_rowkey_aux(index_infos.at(i).index_type_) || + share::schema::is_vec_vid_rowkey_type(index_infos.at(i).index_type_) || + share::schema::is_vec_rowkey_vid_type(index_infos.at(i).index_type_)) { + domain_index_exist = true; break; } } @@ -15086,7 +15495,7 @@ int ObDDLService::check_alter_partitions(const ObTableSchema &orig_table_schema, LOG_WARN("split partition in 4.0 not allowed", K(ret), K(tablegroup_id)); LOG_USER_ERROR(OB_OP_NOT_ALLOW, "split partition in 4.0"); } - bool has_fts_or_multivalue_index = false; + bool has_fts_or_multivalue_or_vec_index = false; const int64_t table_id = orig_table_schema.get_table_id(); if (OB_FAIL(ret) || alter_part_type == obrpc::ObAlterTableArg::DROP_PARTITION || @@ -15094,12 +15503,12 @@ int ObDDLService::check_alter_partitions(const ObTableSchema &orig_table_schema, } else if (OB_FAIL(check_has_domain_index(schema_guard, tenant_id, table_id, - has_fts_or_multivalue_index))) { + has_fts_or_multivalue_or_vec_index))) { LOG_WARN("failed to check if have fts index", K(ret), K(table_id)); - } else if (has_fts_or_multivalue_index) { + } else if (has_fts_or_multivalue_or_vec_index) { ret = OB_NOT_SUPPORTED; - LOG_WARN("alter partition operation on table with fulltext or multivalue index not supported", K(ret), K(orig_table_schema)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "alter partition operation on table with fulltext or multivalue index"); + LOG_WARN("alter partition operation on table with fulltext/multivalue/vector index not supported", K(ret), K(orig_table_schema)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "alter partition operation on table with fulltext/multivalue/vector index"); } if (OB_FAIL(ret)) { @@ -24657,7 +25066,8 @@ int ObDDLService::drop_table(const ObDropTableArg &drop_table_arg, const obrpc:: drop_table_arg.task_id_))) { LOG_WARN("failed to get owner id", K(ret), K(drop_table_arg.task_id_)); } else if (OB_FAIL(ObDDLLock::unlock_for_add_drop_index(*data_table_schema, - tmp_table_schema, + tmp_table_schema.get_table_id(), + tmp_table_schema.is_global_index_table(), owner_id, trans))) { LOG_WARN("failed to unlock for add drop index", K(ret)); @@ -24780,6 +25190,124 @@ int ObDDLService::drop_table(const ObDropTableArg &drop_table_arg, const obrpc:: return ret; } +int ObDDLService::rebuild_vec_index(const ObRebuildIndexArg &arg, obrpc::ObAlterTableRes &res) +{ + int ret = OB_SUCCESS; + LOG_DEBUG("RS start to rebuild vec index", K(arg)); + + if (OB_FAIL(check_inner_stat())) { + ret = OB_INNER_STAT_ERROR; + LOG_WARN("check_inner_stat error", K(ret), K(is_inited())); + } else if (!arg.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(arg)); + } else { + ObSchemaGetterGuard schema_guard; + schema_guard.set_session_id(arg.session_id_); + bool is_db_in_recyclebin = false; + int64_t refreshed_schema_version = 0; + const bool is_index = false; + const uint64_t tenant_id = arg.tenant_id_; + const ObTableSchema *table_schema = NULL; + ObArenaAllocator allocator(ObModIds::OB_SCHEMA); + + if (OB_FAIL(get_tenant_schema_guard_with_version_in_inner_table(tenant_id, schema_guard))) { + LOG_WARN("get_schema_guard failed", K(ret), K(tenant_id)); + } else if (OB_FAIL(schema_guard.get_schema_version(tenant_id, refreshed_schema_version))) { + LOG_WARN("failed to get tenant schema version", K(ret), K(tenant_id)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, arg.database_name_, arg.table_name_, is_index, table_schema))) { + LOG_WARN("failed to get data table schema", K(ret), K(arg)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_USER_ERROR(OB_TABLE_NOT_EXIST, to_cstring(arg.database_name_), to_cstring(arg.table_name_)); + LOG_WARN("table not found", K(arg), K(ret)); + } else if (table_schema->is_in_recyclebin()) { + ret = OB_ERR_OPERATION_ON_RECYCLE_OBJECT; + LOG_WARN("can not truncate index of table in recyclebin.", K(ret), K(arg)); + } else if (OB_FAIL(schema_guard.check_database_in_recyclebin(tenant_id, + table_schema->get_database_id(), + is_db_in_recyclebin))) { + LOG_WARN("check database in recyclebin failed", K(ret), K(tenant_id)); + } else if (is_db_in_recyclebin) { + ret = OB_ERR_OPERATION_ON_RECYCLE_OBJECT; + LOG_WARN("Can not truncate index of db in recyclebin", K(ret), K(arg)); + } else { + const uint64_t table_id = table_schema->get_table_id(); + const ObTableSchema *index_table_schema = NULL; + ObIndexBuilder index_builder(*this); + uint64_t tenant_data_version = 0; + if (OB_FAIL(schema_guard.get_table_schema(tenant_id, arg.index_table_id_, index_table_schema))) { + LOG_WARN("fail to get table schema", K(ret), K(tenant_id), K(index_table_schema)); + } else if (OB_ISNULL(index_table_schema)) { + ret = OB_ERR_CANT_DROP_FIELD_OR_KEY; + LOG_WARN("index table schema should not be null", K(ret), K(arg.index_name_)); + LOG_USER_ERROR(OB_ERR_CANT_DROP_FIELD_OR_KEY, arg.index_name_.length(), arg.index_name_.ptr()); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get min data version failed", K(ret), K(tenant_id)); + } else if (tenant_data_version < DATA_VERSION_4_3_3_0) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "rebuild vec index before 4.3.3 is"); + } else { + ObDDLTaskRecord task_record; + ObDDLSQLTransaction trans(schema_service_); + SMART_VAR(ObRebuildIndexArg, rebuild_index_arg) { + if (OB_FAIL(rebuild_index_arg.assign(arg))) { + LOG_WARN("fail to assign rebuild index arg", K(ret)); + } else if (OB_FAIL(ObVectorIndexUtil::generate_new_index_name(allocator, rebuild_index_arg.index_name_))) { + LOG_WARN("fail to generate new index name", K(ret)); + } else if (OB_FAIL(trans.start(&GCTX.root_service_->get_sql_proxy(), tenant_id, refreshed_schema_version))) { + LOG_WARN("fail to start trans", K(ret)); + } else if (OB_FAIL(index_builder.submit_rebuild_index_task(trans, + rebuild_index_arg, + table_schema, + nullptr/*inc_data_tablet_ids*/, + nullptr/*del_data_tablet_ids*/, + index_table_schema, + rebuild_index_arg.parallelism_, + rebuild_index_arg.consumer_group_id_, + tenant_data_version, + allocator, + task_record))) { + LOG_WARN("fail to submit rebuild vec index task", K(ret), K(index_table_schema)); + } else { + res.task_id_ = task_record.task_id_; + LOG_INFO("succ submit rebuild task", K(res.task_id_)); + } + if (trans.is_started()) { + int temp_ret = OB_SUCCESS; + if (OB_SUCCESS != (temp_ret = trans.end(OB_SUCC(ret)))) { + LOG_WARN("trans end failed", "is_commit", OB_SUCCESS == ret, K(ret), K(temp_ret)); + ret = OB_SUCC(ret) ? temp_ret : ret; + } + } + if (OB_SUCC(ret)) { + int tmp_ret = OB_SUCCESS; + if (OB_FAIL(publish_schema(tenant_id))) { + LOG_WARN("fail to publish schema", K(ret), K(tenant_id)); + } else if (OB_TMP_FAIL(GCTX.root_service_->get_ddl_task_scheduler().schedule_ddl_task(task_record))) { + LOG_WARN("fail to schedule ddl task", K(tmp_ret), K(task_record)); + } + } + } + } + } + + ObSqlString err_table_list; + if (OB_TABLE_NOT_EXIST == ret || OB_ERR_BAD_DATABASE == ret) { + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = log_rebuild_warn_or_err_msg(arg, err_table_list))) { + ret = tmp_ret; + LOG_WARN("log_drop_warn_or_err_msg failed", KR(ret)); + } else { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("failed to drop index table", K(ret), K(err_table_list)); + } + } + } + LOG_DEBUG("finish rebuild vec index", K(ret), K(arg)); + return ret; +} + int ObDDLService::rebuild_index(const ObRebuildIndexArg &arg, obrpc::ObAlterTableRes &res) { int ret = OB_SUCCESS; @@ -25036,7 +25564,8 @@ int ObDDLService::update_index_status(const obrpc::ObUpdateIndexStatusArg &arg) arg.task_id_))) { LOG_WARN("failed to get owner id", K(ret), K(arg.task_id_)); } else if (OB_FAIL(ObDDLLock::unlock_for_add_drop_index(*data_table_schema, - *table, + table->get_table_id(), + table->is_global_index_table(), owner_id, trans))) { LOG_WARN("failed to unlock ddl lock", K(ret)); @@ -30283,7 +30812,6 @@ int ObDDLService::notify_refresh_schema(const ObAddrIArray &addrs) is_async = (0 == tenant_config->_publish_schema_mode.case_compare(PUBLISH_SCHEMA_MODE_ASYNC)); } } - LOG_INFO("try to notify refresh schema", K(is_async), K(schema_version), K(local_schema_info), K(schema_info)); const int64_t rpc_timeout = GCONF.rpc_timeout; int64_t timeout = 0; @@ -30361,7 +30889,6 @@ int ObDDLService::publish_schema(uint64_t tenant_id /*=OB_INVALID_TENANT_ID*/) } else if (OB_FAIL(publish_schema(tenant_id, addrs))) { LOG_WARN("fail to pubish schema", K(ret), K(tenant_id)); } - return OB_SUCCESS; } diff --git a/src/rootserver/ob_ddl_service.h b/src/rootserver/ob_ddl_service.h index 9f0cadb025..8a2a1fb3ef 100644 --- a/src/rootserver/ob_ddl_service.h +++ b/src/rootserver/ob_ddl_service.h @@ -156,6 +156,9 @@ public: ObSchemaGetterGuard &schema_guard, const share::schema::ObTableSchema &mlog_schema); + int rebuild_vec_index(const obrpc::ObRebuildIndexArg &arg, + obrpc::ObAlterTableRes &res); + int rebuild_index(const obrpc::ObRebuildIndexArg &arg, obrpc::ObAlterTableRes &res); @@ -549,6 +552,14 @@ public: const ObTableSchema *orig_table_schema, ObSchemaGetterGuard &schema_guard); + /** + * This function is called by the DDL REBUILD INDEX TASK. + * This task will switch old vector index and new vector index name + * Also will change old vector index status to INDEX_STATUS_UNAVAILABLE + * All these index status and name will change in the same trans + */ + int switch_index_name_and_status_for_vec_index_table(obrpc::ObAlterTableArg &alter_table_arg); + /** * This function is called by the storage layer in the three stage of offline ddl. * all the following steps are completed in the same trans: @@ -1306,7 +1317,7 @@ private: ObSchemaGetterGuard &schema_guard, const uint64_t tenant_id, const uint64_t data_table_id, - bool &fts_exist); + bool &domain_index_exist); int check_has_index_operation( ObSchemaGetterGuard &schema_guard, const uint64_t teannt_id, @@ -2127,11 +2138,28 @@ private: const int64_t new_data_table_schema_version, const ObIArray> &aux_schema_versions, ObDDLSQLTransaction &trans); + int get_dropping_vec_index_invisiable_table_schema_( + const uint64_t tenant_id, + const uint64_t data_table_id, + const uint64_t index_table_id, + const bool is_vec_inner_drop, + const ObString &index_name, + share::schema::ObSchemaGetterGuard &schema_guard, + ObDDLOperator &ddl_operator, + common::ObMySQLTransaction &trans, + common::ObIArray &new_aux_schemas); public: - int generate_aux_index_schema( - const obrpc::ObGenerateAuxIndexSchemaArg &arg, - obrpc::ObGenerateAuxIndexSchemaRes &result); + int create_aux_index( + const obrpc::ObCreateAuxIndexArg &arg, + obrpc::ObCreateAuxIndexRes &result); + int check_aux_index_schema_exist_( + const uint64_t tenant_id, + const obrpc::ObCreateIndexArg &arg, + ObSchemaGetterGuard &schema_guard, + const ObTableSchema *data_schema, + bool &is_exist, + const ObTableSchema *&index_schema); int check_parallel_ddl_conflict( share::schema::ObSchemaGetterGuard &schema_guard, const obrpc::ObDDLArg &arg); @@ -2191,12 +2219,24 @@ public: common::ObIAllocator *allocator = NULL); #endif private: - int check_schema_generated_for_aux_index_schema_( - const obrpc::ObGenerateAuxIndexSchemaArg &arg, + int generate_aux_index_schema_( + const uint64_t tenant_id, ObSchemaGetterGuard &schema_guard, + obrpc::ObCreateIndexArg &create_index_arg, + ObTableSchema &nonconst_data_schema, const ObTableSchema *data_schema, - bool &schema_generated, - uint64_t &index_table_id); + ObIArray &gen_columns, + ObDDLSQLTransaction &trans, + const uint64_t tenant_data_version, + ObTableSchema &index_schema); + int create_aux_index_task_( + const ObTableSchema *data_schema, + const ObTableSchema *idx_schema, + obrpc::ObCreateIndexArg &create_index_arg, + ObArenaAllocator &allocator, + const int64_t parent_task_id, + const uint64_t tenant_data_version, + ObDDLTaskRecord &task_record); int adjust_cg_for_offline(ObTableSchema &new_table_schema); int add_column_group(const obrpc::ObAlterTableArg &alter_table_arg, const share::schema::ObTableSchema &ori_table_schema, diff --git a/src/rootserver/ob_index_builder.cpp b/src/rootserver/ob_index_builder.cpp index e96ac96c9d..aaee3fbc67 100644 --- a/src/rootserver/ob_index_builder.cpp +++ b/src/rootserver/ob_index_builder.cpp @@ -31,6 +31,7 @@ #include "share/config/ob_server_config.h" #include "share/ob_index_builder_util.h" #include "share/ob_fts_index_builder_util.h" +#include "share/ob_vec_index_builder_util.h" #include "observer/ob_server_struct.h" #include "sql/resolver/ddl/ob_ddl_resolver.h" #include "ob_zone_manager.h" @@ -48,6 +49,7 @@ #include "rootserver/ddl_task/ob_ddl_scheduler.h" #include "rootserver/ddl_task/ob_ddl_task.h" #include "share/scn.h" +#include "share/vector_index/ob_vector_index_util.h" namespace oceanbase { @@ -210,6 +212,10 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes ret = OB_NOT_SUPPORTED; LOG_WARN("not support to drop a building index", K(ret), K(arg.is_inner_), KPC(index_table_schema)); LOG_USER_ERROR(OB_NOT_SUPPORTED, "dropping a building index is"); + } else if (index_table_schema->is_vec_index() && compat_version < DATA_VERSION_4_3_3_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("drop vector index before version 4.3.3 is not supported", KR(ret), K(compat_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "drop vector index before version 4.3.3 is"); } else if (arg.is_add_to_scheduler_) { ObDDLOperator ddl_operator(ddl_service_.get_schema_service(), ddl_service_.get_sql_proxy()); ObDDLSQLTransaction trans(&ddl_service_.get_schema_service()); @@ -217,25 +223,31 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes ObArenaAllocator allocator(lib::ObLabel("DdlTaskTmp")); ObDDLTaskRecord task_record; bool has_other_domain_index = false; + const bool is_vec_or_fts_or_multivalue_index = index_table_schema->is_fts_or_multivalue_index() || index_table_schema->is_vec_index(); const bool is_inner_and_fts_index = arg.is_inner_ && index_table_schema->is_fts_index(); const bool is_inner_and_multivalue_index = arg.is_inner_ && index_table_schema->is_multivalue_index(); - const bool is_inner_and_fts_or_mulvalue_index = is_inner_and_fts_index || is_inner_and_multivalue_index; + const bool is_inner_and_vec_index = arg.is_inner_ && !arg.is_vec_inner_drop_ && index_table_schema->is_vec_index(); + const bool is_inner_and_fts_or_mulvalue_or_vector_index = is_inner_and_fts_index || is_inner_and_multivalue_index || is_inner_and_vec_index; bool has_index_task = false; typedef common::ObSEArray TableSchemaArray; SMART_VAR(TableSchemaArray, new_index_schemas) { if (OB_FAIL(schema_guard.get_schema_version(tenant_id, refreshed_schema_version))) { LOG_WARN("failed to get tenant schema version", KR(ret), K(tenant_id)); - } else if ((index_table_schema->is_doc_id_rowkey() || index_table_schema->is_rowkey_doc_id()) - && OB_FAIL(check_has_fts_or_multivalue_index(tenant_id, index_table_schema->get_data_table_id(), schema_guard, + } else if ((index_table_schema->is_doc_id_rowkey() || + index_table_schema->is_rowkey_doc_id() || + index_table_schema->is_vec_rowkey_vid_type() || + index_table_schema->is_vec_vid_rowkey_type()) + && OB_FAIL(check_has_none_shared_index_tables_for_fts_or_multivalue_or_vector_index_(tenant_id, index_table_schema->get_data_table_id(), schema_guard, has_other_domain_index))) { - LOG_WARN("fail to check has domain index", K(ret), K(tenant_id), K(index_table_schema->get_index_type()), K(arg), KPC(index_table_schema)); + LOG_WARN("fail to check has fts/multivalue/vector index", K(ret), K(tenant_id), K(index_table_schema->get_index_type()), K(arg), KPC(index_table_schema)); } else if (has_other_domain_index) { - LOG_INFO("there are some other fulltext or multivalue index, and don't need to drop rowkey doc or doc rowkey", + LOG_INFO("there are some other fts/multivalue/vector index, and don't need to drop share index table", K(index_table_schema->get_index_type()), KPC(index_table_schema)); } else if (OB_FAIL(trans.start(&ddl_service_.get_sql_proxy(), tenant_id, refreshed_schema_version))) { LOG_WARN("start transaction failed", KR(ret), K(tenant_id), K(refreshed_schema_version)); - } else if (!arg.is_inner_ && !index_table_schema->can_read_index() && OB_FAIL(ObDDLTaskRecordOperator::check_has_index_or_mlog_task( - trans, tenant_id, data_table_id, index_table_schema->get_table_id(), has_index_task))) { + } else if (!arg.is_inner_ && + (!index_table_schema->can_read_index() || index_table_schema->is_vec_index()) && + OB_FAIL(ObDDLTaskRecordOperator::check_has_index_or_mlog_task(trans, *index_table_schema, tenant_id, data_table_id, has_index_task))) { LOG_WARN("failed to check ddl conflict", K(ret)); } else if (has_index_task) { ret = OB_NOT_SUPPORTED; @@ -244,7 +256,7 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes } else if (need_rename_index && OB_FAIL(ddl_service_.rename_dropping_index_name( table_schema->get_table_id(), table_schema->get_database_id(), - is_inner_and_fts_or_mulvalue_index, + is_inner_and_fts_or_mulvalue_or_vector_index, arg, schema_guard, ddl_operator, @@ -253,18 +265,21 @@ int ObIndexBuilder::drop_index(const ObDropIndexArg &arg, obrpc::ObDropIndexRes LOG_WARN("rename index name failed", K(ret)); } else if (!need_rename_index && OB_FAIL(new_index_schemas.push_back(*index_table_schema))) { LOG_WARN("failed to assign index table schema to new index schema", KR(ret)); - } else if (is_inner_and_fts_or_mulvalue_index && 0 == new_index_schemas.count()) { + } else if (is_inner_and_fts_or_mulvalue_or_vector_index && 0 == new_index_schemas.count()) { if (OB_FAIL(new_index_schemas.push_back(*index_table_schema))) { LOG_WARN("fail to push back index schema", K(ret), KPC(index_table_schema)); } - } else if (OB_UNLIKELY(!index_table_schema->is_fts_or_multivalue_index() && new_index_schemas.count() != 1) - || OB_UNLIKELY(is_inner_and_fts_or_mulvalue_index && new_index_schemas.count() != 1) + } else if (OB_UNLIKELY(!is_vec_or_fts_or_multivalue_index && new_index_schemas.count() != 1) + || OB_UNLIKELY(is_inner_and_fts_or_mulvalue_or_vector_index && new_index_schemas.count() != 1) + || OB_UNLIKELY(!arg.is_inner_ && index_table_schema->is_vec_delta_buffer_type() && new_index_schemas.count() != 5) // five index assistant table of vec index || OB_UNLIKELY(!arg.is_inner_ && index_table_schema->is_fts_index_aux() && new_index_schemas.count() != 4) || OB_UNLIKELY(!arg.is_inner_ && index_table_schema->is_multivalue_index_aux() && new_index_schemas.count() != 3)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error, invalid new index schema count", K(ret), + "is vec or fts or multivalue index", is_vec_or_fts_or_multivalue_index, "is inner", arg.is_inner_, "count", new_index_schemas.count(), + "is vec index", index_table_schema->is_vec_delta_buffer_type(), "is fts index", index_table_schema->is_fts_index_aux(), "is multivalue index", index_table_schema->is_multivalue_index_aux(), K(new_index_schemas)); @@ -462,6 +477,10 @@ int ObIndexBuilder::submit_build_index_task( if (is_create_fts_index) { param.type_ = ObDDLType::DDL_CREATE_FTS_INDEX; } + bool is_create_vec_index = share::schema::is_vec_index(create_index_arg.index_type_); + if (is_create_vec_index) { + param.type_ = ObDDLType::DDL_CREATE_VEC_INDEX; + } if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, trans, task_record))) { LOG_WARN("submit create index ddl task failed", K(ret)); } else if (OB_FAIL(owner_id.convert_from_value(ObLockOwnerType::DEFAULT_OWNER_TYPE, @@ -475,7 +494,124 @@ int ObIndexBuilder::submit_build_index_task( return ret; } -int ObIndexBuilder::recognize_index_schemas( +int ObIndexBuilder::recognize_vec_index_schemas( + const common::ObIArray &index_schemas, + const bool is_vec_inner_drop, + int64_t &index_ith, + int64_t &rowkey_vid_ith, + int64_t &vid_rowkey_ith, + int64_t &index_id_ith, + int64_t &snapshot_data_ith) +{ + int ret = OB_SUCCESS; + index_ith = -1; + rowkey_vid_ith = -1; + vid_rowkey_ith = -1; + index_id_ith = -1; + snapshot_data_ith = -1; + const int64_t VEC_DOMAIN_INDEX_TABLE_COUNT = 1; // delta_buffer_table + const int64_t VEC_INDEX_TABLE_COUNT = 5; + if (OB_UNLIKELY(VEC_DOMAIN_INDEX_TABLE_COUNT != index_schemas.count() && + !is_vec_inner_drop && (VEC_INDEX_TABLE_COUNT != index_schemas.count()))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(index_schemas)); + } else if (index_schemas.count() == 1) { + index_ith = 0; + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < index_schemas.count(); ++i) { + if (index_schemas.at(i).is_vec_rowkey_vid_type()) { + if (OB_UNLIKELY(-1 != rowkey_vid_ith)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, there are multiple vid rowkey tables", K(ret), K(index_schemas)); + } else { + rowkey_vid_ith = i; + } + } else if (index_schemas.at(i).is_vec_vid_rowkey_type()) { + if (OB_UNLIKELY(-1 != vid_rowkey_ith)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, there are multiple rowkey vid tables", K(ret), K(index_schemas)); + } else { + vid_rowkey_ith = i; + } + } else if (index_schemas.at(i).is_vec_index_id_type()) { + if (OB_UNLIKELY(-1 != index_id_ith)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, there are multiple index id tables", K(ret), K(index_schemas)); + } else { + index_id_ith = i; + } + } else if (index_schemas.at(i).is_vec_index_snapshot_data_type()) { + if (OB_UNLIKELY(-1 != snapshot_data_ith)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, there are multiple snapshot data tables", K(ret), K(index_schemas)); + } else { + snapshot_data_ith = i; + } + } else if (OB_UNLIKELY(-1 != index_ith)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, there are multiple user index tables", K(ret), K(index_schemas)); + } else { + index_ith = i; + } + } + } + return ret; +} + +int ObIndexBuilder::submit_rebuild_index_task( + ObMySQLTransaction &trans, + const obrpc::ObRebuildIndexArg &rebuild_index_arg, + const ObTableSchema *data_schema, + const ObIArray *inc_data_tablet_ids, + const ObIArray *del_data_tablet_ids, + const ObTableSchema *index_schema, + const int64_t parallelism, + const int64_t group_id, + const uint64_t tenant_data_version, + common::ObIAllocator &allocator, + ObDDLTaskRecord &task_record) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(index_schema) || OB_ISNULL(GCTX.root_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), KP(index_schema), K(GCTX.root_service_)); + } else { + ObTableLockOwnerID owner_id; + const int64_t old_index_table_id = OB_INVALID_ID; + const int64_t new_index_table_id = OB_INVALID_ID; + const bool is_global_vector_index = false; + ObCreateDDLTaskParam param(index_schema->get_tenant_id(), + ObDDLType::DDL_REBUILD_INDEX, + index_schema, + nullptr, + 0/*object_id*/, + index_schema->get_schema_version(), + parallelism, + group_id, + &allocator, + &rebuild_index_arg); + param.tenant_data_version_ = tenant_data_version; + if (OB_UNLIKELY(nullptr == data_schema || nullptr == index_schema || tenant_data_version <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("schema is invalid", K(ret), KP(data_schema), KP(index_schema), K(tenant_data_version)); + } else if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, trans, task_record))) { + LOG_WARN("submit create index ddl task failed", K(ret)); + } else if (OB_FAIL(owner_id.convert_from_value(ObLockOwnerType::DEFAULT_OWNER_TYPE, + task_record.task_id_))) { + LOG_WARN("failed to get owner id", K(ret), K(task_record.task_id_)); + } else if (OB_FAIL(ObDDLLock::lock_for_rebuild_index(*data_schema, + old_index_table_id, + new_index_table_id, + is_global_vector_index, + owner_id, + trans))) { + LOG_WARN("failed to lock rebuild index ddl", K(ret)); + } + } + return ret; +} + +int ObIndexBuilder::recognize_fts_index_schemas( const common::ObIArray &index_schemas, int64_t &index_ith, int64_t &aux_doc_word_ith, @@ -535,17 +671,33 @@ int ObIndexBuilder::submit_drop_index_task(ObMySQLTransaction &trans, ObDDLTaskRecord &task_record) { int ret = OB_SUCCESS; - int64_t index_ith = -1; + int64_t index_ith = 0; int64_t aux_doc_word_ith = -1; int64_t aux_rowkey_doc_ith = -1; int64_t aux_doc_rowkey_ith = -1; int64_t aux_multivalue_ith = -1; - if (OB_UNLIKELY(index_schemas.count() != 1 && index_schemas.count() != 4 && index_schemas.count() != 3)) { + int64_t vec_rowkey_vid_ith = -1; + int64_t vec_vid_rowkey_ith = -1; + int64_t vec_index_id_ith = -1; + int64_t vec_snapshot_data_ith = -1; + + const int64_t NORMAL_INDEX_COUNT = 1; + const int64_t FTS_INDEX_COUNT = 4; + const int64_t FTS_OR_MULTIVALUE_INDEX_COUNT = 3; + const int64_t VEC_INDEX_COUNT = 5; + + if (OB_UNLIKELY(index_schemas.count() != NORMAL_INDEX_COUNT && + index_schemas.count() != FTS_INDEX_COUNT && + index_schemas.count() != FTS_OR_MULTIVALUE_INDEX_COUNT && + index_schemas.count() != VEC_INDEX_COUNT)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid index schema count", K(ret), K(index_schemas)); - } else if (OB_FAIL(recognize_index_schemas(index_schemas, index_ith, aux_doc_word_ith, + } else if (index_schemas.at(0).is_fts_index() && OB_FAIL(recognize_fts_index_schemas(index_schemas, index_ith, aux_doc_word_ith, aux_rowkey_doc_ith, aux_doc_rowkey_ith))) { LOG_WARN("fail to recognize index and aux table from schema array", K(ret)); + } else if (index_schemas.at(0).is_vec_index() && OB_FAIL(recognize_vec_index_schemas(index_schemas, arg.is_vec_inner_drop_, index_ith, vec_rowkey_vid_ith, + vec_vid_rowkey_ith, vec_index_id_ith, vec_snapshot_data_ith))) { + LOG_WARN("fail to recognize index and aux table from schema array", K(ret)); } else if (OB_ISNULL(GCTX.root_service_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error, root service is nullptr", K(ret), KP(GCTX.root_service_)); @@ -554,12 +706,22 @@ int ObIndexBuilder::submit_drop_index_task(ObMySQLTransaction &trans, LOG_WARN("unexpected error, invalid array index", K(ret), K(index_ith)); } else { const ObTableSchema &index_schema = index_schemas.at(index_ith); + const bool is_drop_vec_task = (!arg.is_inner_ || arg.is_vec_inner_drop_) && index_schema.is_vec_delta_buffer_type(); // delta_buffer_table const bool is_drop_fts_task = !arg.is_inner_ && index_schema.is_fts_index_aux(); const bool is_drop_multivalue_task = !arg.is_inner_ && index_schema.is_multivalue_index_aux(); const bool is_drop_fts_or_multivalue_task = is_drop_fts_task || is_drop_multivalue_task; + if (OB_UNLIKELY(!index_schema.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(index_schema)); + } else if (OB_UNLIKELY(is_drop_vec_task && !arg.is_vec_inner_drop_ // if is inner_drop, because drop count no necessary equal to five, so ith maybe equal to -1 + && (vec_rowkey_vid_ith < 0 || vec_rowkey_vid_ith >= index_schemas.count() + || vec_vid_rowkey_ith < 0 || vec_vid_rowkey_ith >= index_schemas.count() + || vec_index_id_ith < 0 || vec_index_id_ith >= index_schemas.count() + || vec_snapshot_data_ith < 0 || vec_snapshot_data_ith >= index_schemas.count()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, invalid aux table id for fts index", K(ret), K(is_drop_vec_task), + K(vec_rowkey_vid_ith), K(vec_vid_rowkey_ith), K(vec_index_id_ith), K(vec_snapshot_data_ith), K(index_schemas.count())); } else if (OB_UNLIKELY(is_drop_fts_task && (aux_rowkey_doc_ith < 0 || aux_rowkey_doc_ith >= index_schemas.count() || aux_doc_rowkey_ith < 0 || aux_doc_rowkey_ith >= index_schemas.count() || aux_doc_word_ith < 0 || aux_doc_word_ith >= index_schemas.count()))) { @@ -571,8 +733,8 @@ int ObIndexBuilder::submit_drop_index_task(ObMySQLTransaction &trans, ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error, invalid aux table id for multivalue index", K(ret), K(is_drop_multivalue_task), K(aux_rowkey_doc_ith), K(aux_doc_rowkey_ith), K(index_schemas.count())); - } else if (!is_drop_fts_or_multivalue_task) { - // this isn't drop fts task. + } else if (!is_drop_fts_or_multivalue_task && !is_drop_vec_task) { + // this isn't drop fts and isn't vec index task. const int64_t parent_task_id = arg.task_id_; ObTableLockOwnerID owner_id; const ObDDLType ddl_type = (ObIndexArg::DROP_MLOG == arg.index_action_type_) ? @@ -602,7 +764,7 @@ int ObIndexBuilder::submit_drop_index_task(ObMySQLTransaction &trans, nullptr/*del_data_tablet_ids*/, index_schema, owner_id, trans))) { LOG_WARN("failed to lock online ddl lock", K(ret)); } - } else { // create dropping fts index parent task. + } else if (is_drop_fts_or_multivalue_task) { // create dropping fts index parent task. ObDDLType ddl_type = is_drop_fts_task ? ObDDLType::DDL_DROP_FTS_INDEX : ObDDLType::DDL_DROP_MULVALUE_INDEX; ObCreateDDLTaskParam param(index_schema.get_tenant_id(), ddl_type, @@ -623,6 +785,32 @@ int ObIndexBuilder::submit_drop_index_task(ObMySQLTransaction &trans, if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, trans, task_record))) { LOG_WARN("fail to create drop fts index task", K(ret), K(param)); } + } else if (is_drop_vec_task) { + ObCreateDDLTaskParam param(index_schema.get_tenant_id(), + ObDDLType::DDL_DROP_VEC_INDEX, + &index_schema, + nullptr/*dest_table_schema*/, + 0/*object_id*/, + index_schema.get_schema_version(), + 0/*parallelism*/, + arg.consumer_group_id_, + &allocator, + &arg); + param.vec_vid_rowkey_schema_ = vec_vid_rowkey_ith == -1 ? nullptr : &(index_schemas.at(vec_vid_rowkey_ith)); + param.vec_rowkey_vid_schema_ = vec_rowkey_vid_ith == -1 ? nullptr : &(index_schemas.at(vec_rowkey_vid_ith)); + param.vec_index_id_schema_ = vec_index_id_ith == -1 ? nullptr : &(index_schemas.at(vec_index_id_ith)); + param.vec_snapshot_data_schema_ = vec_snapshot_data_ith == -1 ? nullptr : &(index_schemas.at(vec_snapshot_data_ith)); + if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, trans, task_record))) { + if (OB_HASH_EXIST == ret) { + task_has_exist = true; + ret = OB_SUCCESS; + } else { + LOG_WARN("submit drop vec index ddl task failed", K(ret), K(param)); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected drop index task", K(ret), K(is_drop_fts_or_multivalue_task), K(is_drop_vec_task)); } } return ret; @@ -638,9 +826,10 @@ int ObIndexBuilder::do_create_local_index( ObSEArray gen_columns; ObDDLTaskRecord task_record; ObArenaAllocator allocator(lib::ObLabel("DdlTaskTmp")); - HEAP_VARS_3((ObTableSchema, index_schema), + HEAP_VARS_4((ObTableSchema, index_schema), (ObTableSchema, new_table_schema), - (obrpc::ObCreateIndexArg, my_arg)) { + (obrpc::ObCreateIndexArg, my_arg), + (obrpc::ObCreateIndexArg, tmp_arg)) { ObDDLSQLTransaction trans(&ddl_service_.get_schema_service()); int64_t refreshed_schema_version = 0; const uint64_t tenant_id = table_schema.get_tenant_id(); @@ -680,10 +869,55 @@ int ObIndexBuilder::do_create_local_index( my_arg.index_schema_.set_index_type(INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE); } } + bool rowkey_vid_exist = false; if (OB_FAIL(ret)) { + } else if (share::schema::is_vec_index(my_arg.index_type_)) { + const ObTableSchema *rowkey_vid_schema = nullptr; + if (OB_FAIL(tmp_arg.assign(my_arg))) { + LOG_WARN("fail to assign arg", K(ret)); + } else if (!tmp_arg.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to copy create index arg", K(ret)); + } else if (!create_index_arg.is_rebuild_index_ && + FALSE_IT(tmp_arg.index_type_ = INDEX_TYPE_VEC_ROWKEY_VID_LOCAL)) { + } else if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator, tmp_arg.index_type_, tmp_arg.index_name_, tmp_arg.index_name_))) { + LOG_WARN("failed to adjust vec index name", K(ret)); + } else if (OB_FAIL(ddl_service_.check_aux_index_schema_exist_(tenant_id, + tmp_arg, + schema_guard, + &new_table_schema, + rowkey_vid_exist, + rowkey_vid_schema))) { + LOG_WARN("fail to check rowkey vid schema existence", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (share::schema::is_vec_index(my_arg.index_type_) && + !create_index_arg.is_rebuild_index_ && + !rowkey_vid_exist && + FALSE_IT(my_arg.index_type_ = INDEX_TYPE_VEC_ROWKEY_VID_LOCAL)) { + // 1. generate rowkey vid schema if not exist + // 2. otherwise generate vec index aux schema + } else if (create_index_arg.is_rebuild_index_) { + if (share::schema::is_vec_index(my_arg.index_type_)) { + if (OB_FAIL(ObVectorIndexUtil::generate_index_schema_from_exist_table(tenant_id, + schema_guard, + ddl_service_, + create_index_arg, + table_schema, + index_schema))) { + LOG_WARN("fail to generate index schema from exist table", K(ret), K(tenant_id), K(create_index_arg)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index type to generate index schema from exist table", K(ret), K(my_arg.index_type_)); + } } else if (share::schema::is_fts_index(my_arg.index_type_) && OB_FAIL(ObFtsIndexBuilderUtil::generate_fts_aux_index_name(my_arg, &allocator))) { LOG_WARN("failed to adjust fts index name", K(ret)); + } else if (share::schema::is_vec_index(my_arg.index_type_) && + OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator, my_arg.index_type_, my_arg.index_name_, my_arg.index_name_))) { + LOG_WARN("failed to adjust vec index name", K(ret)); } else if (OB_FAIL(ObIndexBuilderUtil::adjust_expr_index_args( my_arg, new_table_schema, allocator, gen_columns))) { LOG_WARN("fail to adjust expr index args", K(ret)); @@ -691,6 +925,8 @@ int ObIndexBuilder::do_create_local_index( my_arg, new_table_schema, global_index_without_column_info, true/*generate_id*/, index_schema))) { LOG_WARN("fail to generate schema", K(ret), K(my_arg)); + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(new_table_schema.check_create_index_on_hidden_primary_key(index_schema))) { LOG_WARN("failed to check create index on table", K(ret), K(index_schema)); } else if (gen_columns.empty()) { @@ -801,7 +1037,8 @@ int ObIndexBuilder::do_create_index( || INDEX_TYPE_DOMAIN_CTXCAT_DEPRECATED == arg.index_type_ || INDEX_TYPE_SPATIAL_LOCAL == arg.index_type_ || is_fts_index(arg.index_type_) - || is_multivalue_index(arg.index_type_)) { + || is_multivalue_index(arg.index_type_) + || is_vec_index(arg.index_type_)) { if (OB_FAIL(do_create_local_index(schema_guard, arg, *table_schema, res))) { LOG_WARN("fail to do create local index", K(ret), K(arg)); } @@ -910,8 +1147,17 @@ int ObIndexBuilder::generate_schema( LOG_WARN("tenant data version is less than 4.3.1, multivalue index is not supported", K(ret), K(tenant_data_version)); LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.1, multivalue index"); } + } else if (share::schema::is_vec_index(arg.index_type_)) { + uint64_t tenant_data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(data_schema.get_tenant_id(), + tenant_data_version))) { + LOG_WARN("failed to get tenant data version", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_3_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.3, vector index is not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.3, vector index"); + } } - if (OB_SUCC(ret) && (INDEX_TYPE_NORMAL_LOCAL == arg.index_type_ || INDEX_TYPE_UNIQUE_LOCAL == arg.index_type_ @@ -1038,6 +1284,10 @@ int ObIndexBuilder::generate_schema( } else if (OB_FAIL(set_index_table_options(arg, data_schema, schema))) { LOG_WARN("set_index_table_options failed", K(arg), K(data_schema), K(ret)); } else { + if (!share::schema::is_built_in_vec_index(arg.index_type_)) { + // only delta_buffer_table set vector_index_param + schema.set_index_params(arg.index_schema_.get_index_params()); + } schema.set_name_generated_type(arg.index_schema_.get_name_generated_type()); LOG_INFO("finish generate index schema", K(schema)); } @@ -1190,7 +1440,8 @@ int ObIndexBuilder::set_basic_infos(const ObCreateIndexArg &arg, || INDEX_TYPE_UNIQUE_LOCAL == arg.index_type_ || INDEX_TYPE_SPATIAL_LOCAL == arg.index_type_ || is_fts_index(arg.index_type_) - || is_multivalue_index(arg.index_type_)) { + || is_multivalue_index(arg.index_type_) + || is_vec_index(arg.index_type_)) { schema.set_part_level(data_schema.get_part_level()); } else {} // partition level is filled during resolve stage for global index schema.set_charset_type(data_schema.get_charset_type()); @@ -1288,15 +1539,15 @@ bool ObIndexBuilder::is_final_index_status(const ObIndexStatus index_status) con || is_error_index_status(index_status)); } -int ObIndexBuilder::check_has_fts_or_multivalue_index( +int ObIndexBuilder::check_has_none_shared_index_tables_for_fts_or_multivalue_or_vector_index_( const uint64_t tenant_id, const uint64_t data_table_id, share::schema::ObSchemaGetterGuard &schema_guard, - bool &has_fts_or_multivalue_index) + bool &has_fts_or_multivalue_or_vector_index) { int ret = OB_SUCCESS; ObSEArray indexs; - has_fts_or_multivalue_index = false; + has_fts_or_multivalue_or_vector_index = false; if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id || OB_INVALID_ID == data_table_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid tenant id or data table id", K(ret), K(tenant_id), K(data_table_id)); @@ -1304,17 +1555,21 @@ int ObIndexBuilder::check_has_fts_or_multivalue_index( LOG_WARN("fail to get index schema with data table id", K(ret), K(tenant_id), K(data_table_id)); } else { bool has_other_fts_index = false; - for (int64_t i = 0; OB_SUCC(ret) && !has_fts_or_multivalue_index && i < indexs.count(); ++i) { + for (int64_t i = 0; OB_SUCC(ret) && !has_fts_or_multivalue_or_vector_index && i < indexs.count(); ++i) { const ObSimpleTableSchemaV2 *index_schema = indexs.at(i); if (OB_ISNULL(index_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error, index schema is nullptr", K(ret), KP(index_schema), K(i), K(indexs)); - } else if (!index_schema->is_fts_index() && !index_schema->is_multivalue_index()) { + } else if (!index_schema->is_fts_index() && !index_schema->is_multivalue_index() && !index_schema->is_vec_index()) { continue; // The index isn't fulltext index / multivalue index, just skip. } else if (index_schema->is_fts_index_aux() || index_schema->is_fts_doc_word_aux() || - index_schema->is_multivalue_index_aux()) { // The index is fulltext index - has_fts_or_multivalue_index = true; + index_schema->is_multivalue_index_aux() || + index_schema->is_vec_index_id_type() || + index_schema->is_vec_delta_buffer_type() || + index_schema->is_vec_index_snapshot_data_type()) { + // none-shared-index tables still exist. shared-index table for FTS/MULTI-VALUE/VEC index should not be deleted + has_fts_or_multivalue_or_vector_index = true; } } } @@ -1327,7 +1582,7 @@ bool ObIndexBuilder::ignore_error_code_for_domain_index( const share::schema::ObTableSchema *index_schema/*= nullptr*/) { const bool is_domain_index = nullptr == index_schema ? - true : (index_schema->is_fts_index() || index_schema->is_multivalue_index()); + true : (index_schema->is_vec_index() || index_schema->is_fts_index() || index_schema->is_multivalue_index()); bool ignore = false; if (!arg.is_inner_ || !is_domain_index) { ignore = false; diff --git a/src/rootserver/ob_index_builder.h b/src/rootserver/ob_index_builder.h index 6df6645565..033af76419 100644 --- a/src/rootserver/ob_index_builder.h +++ b/src/rootserver/ob_index_builder.h @@ -98,8 +98,27 @@ public: const uint64_t tenant_data_version, common::ObIAllocator &allocator, ObDDLTaskRecord &task_record); + int submit_rebuild_index_task(common::ObMySQLTransaction &trans, + const obrpc::ObRebuildIndexArg &arg, + const share::schema::ObTableSchema *data_schema, + const common::ObIArray *inc_data_tablet_ids, + const common::ObIArray *del_data_tablet_ids, + const share::schema::ObTableSchema *index_schema, + const int64_t parallelism, + const int64_t group_id, + const uint64_t tenant_data_version, + common::ObIAllocator &allocator, + ObDDLTaskRecord &task_record); private: - int recognize_index_schemas( + int recognize_vec_index_schemas( + const common::ObIArray &index_schemas, + const bool is_vec_inner_drop, + int64_t &index_ith, + int64_t &rowkey_vid_ith, + int64_t &vid_rowkey_ith, + int64_t &index_id_ith, + int64_t &snapshot_data_ith); + int recognize_fts_index_schemas( const common::ObIArray &index_schemas, int64_t &index_ith, int64_t &aux_doc_word_ith, @@ -116,11 +135,11 @@ private: share::schema::ObTableSchema &schema); bool is_final_index_status(const share::schema::ObIndexStatus index_status) const; - int check_has_fts_or_multivalue_index( + int check_has_none_shared_index_tables_for_fts_or_multivalue_or_vector_index_( const uint64_t tenant_id, const uint64_t data_table_id, share::schema::ObSchemaGetterGuard &schema_guard, - bool &has_fts_or_multivalue_index); + bool &has_fts_or_multivalue_or_vector_index); bool ignore_error_code_for_domain_index( const int ret, const obrpc::ObDropIndexArg &arg, diff --git a/src/rootserver/ob_root_service.cpp b/src/rootserver/ob_root_service.cpp index 1291a72a2c..4d5eb0923e 100755 --- a/src/rootserver/ob_root_service.cpp +++ b/src/rootserver/ob_root_service.cpp @@ -4186,6 +4186,12 @@ int ObRootService::execute_ddl_task(const obrpc::ObAlterTableArg &arg, } break; } + case share::SWITCH_VEC_INDEX_NAME_TASK: { + if (OB_FAIL(ddl_service_.switch_index_name_and_status_for_vec_index_table(const_cast(arg)))) { + LOG_WARN("make recovert restore task visible failed", K(ret), K(arg)); + } + break; + } default: ret = OB_ERR_UNEXPECTED; LOG_WARN("unknown ddl task type", K(ret), K(arg.ddl_task_type_)); @@ -4674,15 +4680,15 @@ int ObRootService::exchange_partition(const obrpc::ObExchangePartitionArg &arg, return ret; } -int ObRootService::generate_aux_index_schema( - const ObGenerateAuxIndexSchemaArg &arg, - ObGenerateAuxIndexSchemaRes &result) +int ObRootService::create_aux_index( + const ObCreateAuxIndexArg &arg, + ObCreateAuxIndexRes &result) { int ret = OB_SUCCESS; if (!arg.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(arg)); - } else if (OB_FAIL(ddl_service_.generate_aux_index_schema(arg, result))) { + } else if (OB_FAIL(ddl_service_.create_aux_index(arg, result))) { LOG_WARN("failed to generate aux index schema", K(ret), K(arg), K(result)); } LOG_INFO("finish generate aux index schema", K(ret), K(arg), K(result), "ddl_event_info", ObDDLEventInfo()); @@ -4986,9 +4992,24 @@ int ObRootService::drop_index(const obrpc::ObDropIndexArg &arg, obrpc::ObDropInd int ObRootService::rebuild_vec_index(const obrpc::ObRebuildIndexArg &arg, obrpc::ObAlterTableRes &res) { - int ret = OB_NOT_SUPPORTED; - UNUSED(arg); - UNUSED(res); + int ret = OB_SUCCESS; + if (!inited_) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (!arg.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(arg)); + } else if (OB_FAIL(ddl_service_.rebuild_vec_index(arg, res))) { + LOG_WARN("ddl_service rebuild index failed", K(arg), K(ret)); + } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "rebuild index", + "tenant_id", arg.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_id", arg.index_table_id_, + "schema_version", res.schema_version_); + LOG_INFO("finish rebuild index ddl", K(ret), K(arg), K(res), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -10452,6 +10473,8 @@ int ObRootService::set_config_pre_hook(obrpc::ObAdminSetConfigArg &arg) ret = check_tx_share_memory_limit_(*item); } else if (0 == STRCMP(item->name_.ptr(), MEMSTORE_LIMIT_PERCENTAGE)) { ret = check_memstore_limit_(*item); + } else if (0 == STRCMP(item->name_.ptr(), OB_VECTOR_MEMORY_LIMIT_PERCENTAGE)) { + ret = check_vector_memory_limit_(*item); } else if (0 == STRCMP(item->name_.ptr(), DATA_DISK_WRITE_LIMIT_PERCENTAGE)) { ret = check_data_disk_write_limit_(*item); } else if (0 == STRCMP(item->name_.ptr(), DATA_DISK_USAGE_LIMIT_PERCENTAGE)) { @@ -10605,6 +10628,15 @@ int ObRootService::check_memstore_limit_(obrpc::ObAdminSetConfigItem &item) return ret; } +int ObRootService::check_vector_memory_limit_(obrpc::ObAdminSetConfigItem &item) +{ + int ret = OB_SUCCESS; + const char *warn_log = "ob_vector_limit_percentage. " + "It should be less than (85 - memstore_limit_percentage), check parameter 'memstore_limit_percentage' or '_memstore_limit_percentage'"; + CHECK_TENANTS_CONFIG_WITH_FUNC(ObConfigVectorMemoryChecker, warn_log); + return ret; +} + int ObRootService::check_tenant_memstore_limit_(obrpc::ObAdminSetConfigItem &item) { int ret = OB_SUCCESS; diff --git a/src/rootserver/ob_root_service.h b/src/rootserver/ob_root_service.h index 4bcfdd8cfa..d0cde4049e 100644 --- a/src/rootserver/ob_root_service.h +++ b/src/rootserver/ob_root_service.h @@ -519,9 +519,9 @@ public: int truncate_table(const obrpc::ObTruncateTableArg &arg, obrpc::ObDDLRes &res); int truncate_table_v2(const obrpc::ObTruncateTableArg &arg, obrpc::ObDDLRes &res); int exchange_partition(const obrpc::ObExchangePartitionArg &arg, obrpc::ObAlterTableRes &res); - int generate_aux_index_schema( - const obrpc::ObGenerateAuxIndexSchemaArg &arg, - obrpc::ObGenerateAuxIndexSchemaRes &result); + int create_aux_index( + const obrpc::ObCreateAuxIndexArg &arg, + obrpc::ObCreateAuxIndexRes &result); int create_index(const obrpc::ObCreateIndexArg &arg, obrpc::ObAlterTableRes &res); int drop_table(const obrpc::ObDropTableArg &arg, obrpc::ObDDLRes &res); int drop_database(const obrpc::ObDropDatabaseArg &arg, obrpc::ObDropDatabaseRes &drop_database_res); @@ -965,6 +965,7 @@ private: int add_rs_event_for_alter_ls_replica_(const obrpc::ObAdminAlterLSReplicaArg &arg, const int ret_val); int check_data_disk_write_limit_(obrpc::ObAdminSetConfigItem &item); int check_data_disk_usage_limit_(obrpc::ObAdminSetConfigItem &item); + int check_vector_memory_limit_(obrpc::ObAdminSetConfigItem &item); private: static const int64_t OB_MAX_CLUSTER_REPLICA_COUNT = 10000000; static const int64_t OB_ROOT_SERVICE_START_FAIL_COUNT_UPPER_LIMIT = 5; diff --git a/src/rootserver/ob_rs_rpc_processor.h b/src/rootserver/ob_rs_rpc_processor.h index 8f305165b4..e745d0102a 100644 --- a/src/rootserver/ob_rs_rpc_processor.h +++ b/src/rootserver/ob_rs_rpc_processor.h @@ -341,7 +341,7 @@ DEFINE_DDL_RS_RPC_PROCESSOR(obrpc::OB_DROP_TABLE, ObRpcDropTableP, drop_table(ar DEFINE_DDL_RS_RPC_PROCESSOR(obrpc::OB_RENAME_TABLE, ObRpcRenameTableP, rename_table(arg_)); DEFINE_DDL_RS_RPC_PROCESSOR(obrpc::OB_TRUNCATE_TABLE, ObRpcTruncateTableP, truncate_table(arg_, result_)); DEFINE_DDL_RS_RPC_PROCESSOR(obrpc::OB_TRUNCATE_TABLE_V2, ObRpcTruncateTableV2P, truncate_table_v2(arg_, result_)); -DEFINE_DDL_RS_RPC_PROCESSOR(obrpc::OB_GENERATE_AUX_INDEX_SCHEMA, ObRpcGenerateAuxIndexSchemaP, generate_aux_index_schema(arg_, result_)); +DEFINE_DDL_RS_RPC_PROCESSOR(obrpc::OB_CREATE_AUX_INDEX, ObRpcCreateAuxIndexP, create_aux_index(arg_, result_)); DEFINE_DDL_RS_RPC_PROCESSOR(obrpc::OB_CREATE_INDEX, ObRpcCreateIndexP, create_index(arg_, result_)); DEFINE_DDL_RS_RPC_PROCESSOR(obrpc::OB_DROP_INDEX, ObRpcDropIndexP, drop_index(arg_, result_)); DEFINE_DDL_RS_RPC_PROCESSOR(obrpc::OB_REBUILD_VEC_INDEX, ObRpcRebuildVecIndexP, rebuild_vec_index(arg_, result_)); diff --git a/src/share/CMakeLists.txt b/src/share/CMakeLists.txt index 920f424efc..fd673bcd9d 100644 --- a/src/share/CMakeLists.txt +++ b/src/share/CMakeLists.txt @@ -195,6 +195,8 @@ ob_set_subtarget(ob_share common ob_throttling_utils.cpp ob_storage_ha_diagnose_struct.cpp ob_storage_ha_diagnose_operator.cpp + ob_vec_index_builder_util.cpp + ob_domain_index_builder_util.cpp ob_service_name_proxy.cpp ob_compatibility_control.cpp ) @@ -573,6 +575,24 @@ ob_set_subtarget(ob_share vector vector/ob_continuous_base.cpp ) +ob_set_subtarget(ob_share vector_index + vector_index/ob_plugin_vector_index_adaptor.cpp + vector_index/ob_plugin_vector_index_util.cpp + vector_index/ob_plugin_vector_index_scheduler.cpp + vector_index/ob_plugin_vector_index_service.cpp + vector_index/ob_plugin_vector_index_serialize.cpp + vector_index/ob_plugin_vector_index_utils.cpp + vector_index/ob_vector_index_util.cpp +) + +ob_set_subtarget(ob_share vector_type + vector_type/ob_vector_l2_distance.cpp + vector_type/ob_vector_ip_distance.cpp + vector_type/ob_vector_cosine_distance.cpp + vector_type/ob_vector_l1_distance.cpp + vector_type/ob_vector_norm.cpp +) + ob_add_new_object_target(ob_share ob_share) target_compile_options(ob_share PRIVATE) @@ -584,4 +604,4 @@ add_library(ob_share_static EXCLUDE_FROM_ALL) target_link_libraries(ob_share_static - PUBLIC ob_share) + PUBLIC ob_share) \ No newline at end of file diff --git a/src/share/aggregate/agg_ctx.h b/src/share/aggregate/agg_ctx.h index 227193808b..45e00791fc 100644 --- a/src/share/aggregate/agg_ctx.h +++ b/src/share/aggregate/agg_ctx.h @@ -321,6 +321,7 @@ struct RuntimeContext MEMCPY(agg_cell, src, data_len); } } + void reuse() { agg_rows_.reuse(); diff --git a/src/share/aggregate/first_row.h b/src/share/aggregate/first_row.h index bd5bcbfe16..07429188c7 100644 --- a/src/share/aggregate/first_row.h +++ b/src/share/aggregate/first_row.h @@ -14,6 +14,7 @@ #define OCEANBASE_SHARE_AGGREGATE_FIRST_ROW_H_ #include "iaggregate.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase { @@ -156,7 +157,12 @@ public: if (OB_LIKELY(not_nulls.at(agg_col_id) && agg_cell_len != INT32_MAX)) { const char *payload = (const char *)(*reinterpret_cast(agg_cell)); char *res_buf = nullptr; - if (is_discrete_vec(vec_tc)) { + if (agg_expr.is_nested_expr() && !is_uniform_format(res_vec->get_format())) { + ObString nested_data(agg_cell_len, payload); + if (OB_FAIL(ObArrayExprUtils::dispatch_array_attrs(ctx, const_cast(agg_expr), nested_data, output_idx))) { + LOG_WARN("fail to do nested expr from rows", K(ret)); + } + } else if (is_discrete_vec(vec_tc)) { // implicit aggr expr may be shared between operators and its // data is shallow copied for variable-length types while do backup/restore operations. // Hence child op's data is unexpected modified if deep copy happened here. @@ -169,6 +175,9 @@ public: } } else { res_vec->set_null(output_idx); + if (agg_expr.is_nested_expr() && !is_uniform_format(res_vec->get_format())) { + ObArrayExprUtils::set_expr_attrs_null(agg_expr, ctx, output_idx); + } } return ret; } diff --git a/src/share/aggregate/iaggregate.h b/src/share/aggregate/iaggregate.h index 5edb405e79..89b6178175 100644 --- a/src/share/aggregate/iaggregate.h +++ b/src/share/aggregate/iaggregate.h @@ -215,6 +215,23 @@ public: return ret; } + inline int get_nested_expr_vec(RuntimeContext &agg_ctx, const ObExpr *param_expr, ObIVector *¶m_vec) + { + int ret = OB_SUCCESS; + ObEvalCtx &eval_ctx = agg_ctx.eval_ctx_; + param_vec = param_expr->get_vector(eval_ctx); + VectorFormat fmt = param_expr->get_format(eval_ctx); + if (param_expr->is_nested_expr()) { + if (param_expr->attrs_cnt_ != 3) { // only vector type supported + ret = OB_ERR_UNEXPECTED; + SQL_LOG(WARN, "unexpected attrs_cnt_", K(param_expr->attrs_cnt_)); + } else if (fmt == common::VEC_DISCRETE || fmt == common::VEC_CONTINUOUS) { + param_vec = param_expr->attrs_[2]->get_vector(eval_ctx); + } + } + return ret; + } + inline int add_batch_for_multi_groups(RuntimeContext &agg_ctx, AggrRowPtr *agg_rows, RowSelector &row_sel, const int64_t batch_size, const int32_t agg_col_id) override @@ -230,6 +247,7 @@ public: ObEvalCtx &eval_ctx = agg_ctx.eval_ctx_; VectorFormat fmt = VEC_INVALID; ObExpr *param_expr = nullptr; + ObIVector *param_vec = nullptr; Derived *derived_this = static_cast(this); #ifndef NDEBUG int64_t mock_skip_data = 0; @@ -253,27 +271,29 @@ public: SQL_LOG(WARN, "inner add one row failed", K(ret)); } } + } else if (OB_FAIL(get_nested_expr_vec(agg_ctx, param_expr, param_vec))) { + SQL_LOG(WARN, "get nested expr vec failed", K(ret)); } else { VecValueTypeClass vec_tc = param_expr->get_vec_value_tc(); switch(fmt) { case common::VEC_UNIFORM: { ret = inner_add_for_multi_groups>( - agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_expr->get_vector(eval_ctx)); + agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_vec); break; } case common::VEC_UNIFORM_CONST: { ret = inner_add_for_multi_groups>( - agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_expr->get_vector(eval_ctx)); + agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_vec); break; } case common::VEC_DISCRETE: { ret = inner_add_for_multi_groups( - agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_expr->get_vector(eval_ctx)); + agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_vec); break; } case common::VEC_CONTINUOUS: { ret = inner_add_for_multi_groups( - agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_expr->get_vector(eval_ctx)); + agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_vec); break; } case common::VEC_FIXED: { @@ -446,17 +466,22 @@ protected: { int ret = OB_SUCCESS; ObEvalCtx &ctx = agg_ctx.eval_ctx_; - ColumnFmt &columns = *static_cast(param_expr.get_vector(ctx)); - bool all_not_null = !columns.has_null(); + ColumnFmt *columns = nullptr; + ObIVector *ivec = nullptr; + bool all_not_null = false; Derived &derived = *static_cast(this); void *tmp_res = derived.get_tmp_res(agg_ctx, agg_col_id, agg_cell); int64_t calc_info = derived.get_batch_calc_info(agg_ctx, agg_col_id, agg_cell); - if (OB_LIKELY(!agg_ctx.removal_info_.enable_removal_opt_)) { + if (OB_FAIL(get_nested_expr_vec(agg_ctx, ¶m_expr, ivec))) { + SQL_LOG(WARN, "get nested expr vec failed", K(ret)); + } else if (FALSE_IT(columns = static_cast(ivec))) { + } else if (FALSE_IT(all_not_null = !columns->has_null())) { + } else if (OB_LIKELY(!agg_ctx.removal_info_.enable_removal_opt_)) { if (OB_LIKELY(row_sel.is_empty() && bound.get_all_rows_active())) { if (all_not_null) { for (int i = bound.start(); OB_SUCC(ret) && i < bound.end(); i++) { if (OB_FAIL( - derived.add_row(agg_ctx, columns, i, agg_col_id, agg_cell, tmp_res, calc_info))) { + derived.add_row(agg_ctx, *columns, i, agg_col_id, agg_cell, tmp_res, calc_info))) { SQL_LOG(WARN, "add row failed", K(ret)); } } // end for @@ -466,7 +491,7 @@ protected: } } else { for (int i = bound.start(); OB_SUCC(ret) && i < bound.end(); i++) { - if (OB_FAIL(derived.add_nullable_row(agg_ctx, columns, i, agg_col_id, agg_cell, tmp_res, + if (OB_FAIL(derived.add_nullable_row(agg_ctx, *columns, i, agg_col_id, agg_cell, tmp_res, calc_info))) { SQL_LOG(WARN, "add row failed", K(ret)); } @@ -475,7 +500,7 @@ protected: } else if (!row_sel.is_empty()) { if (all_not_null) { for (int i = 0; OB_SUCC(ret) && i < row_sel.size(); i++) { - if (OB_FAIL(derived.add_row(agg_ctx, columns, row_sel.index(i), agg_col_id, agg_cell, + if (OB_FAIL(derived.add_row(agg_ctx, *columns, row_sel.index(i), agg_col_id, agg_cell, tmp_res, calc_info))) { SQL_LOG(WARN, "add row failed", K(ret)); } @@ -486,7 +511,7 @@ protected: } } else { for (int i = 0; OB_SUCC(ret) && i < row_sel.size(); i++) { - if (OB_FAIL(derived.add_nullable_row(agg_ctx, columns, row_sel.index(i), agg_col_id, + if (OB_FAIL(derived.add_nullable_row(agg_ctx, *columns, row_sel.index(i), agg_col_id, agg_cell, tmp_res, calc_info))) { SQL_LOG(WARN, "add row failed", K(ret)); } @@ -496,7 +521,7 @@ protected: if (all_not_null) { for (int i = bound.start(); OB_SUCC(ret) && i < bound.end(); i++) { if (skip.at(i)) { - } else if (OB_FAIL(derived.add_row(agg_ctx, columns, i, agg_col_id, agg_cell, tmp_res, + } else if (OB_FAIL(derived.add_row(agg_ctx, *columns, i, agg_col_id, agg_cell, tmp_res, calc_info))) { SQL_LOG(WARN, "add row failed", K(ret)); } @@ -508,7 +533,7 @@ protected: } else { for (int i = bound.start(); OB_SUCC(ret) && i < bound.end(); i++) { if (skip.at(i)) { - } else if (OB_FAIL(derived.add_nullable_row(agg_ctx, columns, i, agg_col_id, agg_cell, + } else if (OB_FAIL(derived.add_nullable_row(agg_ctx, *columns, i, agg_col_id, agg_cell, tmp_res, calc_info))) { SQL_LOG(WARN, "add row failed", K(ret)); } @@ -519,7 +544,7 @@ protected: if (row_sel.is_empty()) { if (bound.get_all_rows_active()) { for (int i = bound.start(); OB_SUCC(ret) && i < bound.end(); i++) { - ret = removal_opt::add_or_sub_row(derived, agg_ctx, columns, i, agg_col_id, agg_cell, + ret = removal_opt::add_or_sub_row(derived, agg_ctx, *columns, i, agg_col_id, agg_cell, tmp_res, calc_info); if (OB_FAIL(ret)) { SQL_LOG(WARN, "add or sub row failed", K(ret)); } } @@ -527,7 +552,7 @@ protected: for (int i = bound.start(); OB_SUCC(ret) && i < bound.end(); i++) { if (skip.at(i)) { } else { - ret = removal_opt::add_or_sub_row(derived, agg_ctx, columns, i, agg_col_id, agg_cell, + ret = removal_opt::add_or_sub_row(derived, agg_ctx, *columns, i, agg_col_id, agg_cell, tmp_res, calc_info); if (OB_FAIL(ret)) { SQL_LOG(WARN, "add or sub row failed", K(ret)); } } @@ -535,7 +560,7 @@ protected: } } else { for (int i = 0; OB_SUCC(ret) && i < row_sel.size(); i++) { - ret = removal_opt::add_or_sub_row(derived, agg_ctx, columns, i, agg_col_id, agg_cell, + ret = removal_opt::add_or_sub_row(derived, agg_ctx, *columns, i, agg_col_id, agg_cell, tmp_res, calc_info); if (OB_FAIL(ret)) { SQL_LOG(WARN, "add or sub row failed", K(ret)); } } @@ -978,6 +1003,7 @@ inline constexpr bool is_var_len_agg_cell(VecValueTypeClass vec_tc) || vec_tc == VEC_TC_JSON || vec_tc == VEC_TC_GEO || vec_tc == VEC_TC_UDT + || vec_tc == VEC_TC_COLLECTION || vec_tc == VEC_TC_ROARINGBITMAP || vec_tc == VEC_TC_EXTEND; } diff --git a/src/share/aggregate/single_row.cpp b/src/share/aggregate/single_row.cpp index a0beb6ca87..51e04f2b86 100644 --- a/src/share/aggregate/single_row.cpp +++ b/src/share/aggregate/single_row.cpp @@ -244,6 +244,15 @@ static int init_single_row_sum_agg(VecValueTypeClass in_tc, VecValueTypeClass ou } break; } + case VEC_TC_COLLECTION: { + if (out_tc != VEC_TC_COLLECTION) { + ret = OB_ERR_UNEXPECTED; + } else { + ret = init_agg_func>( + agg_ctx, i, allocator, agg); + } + break; + } default: { ret = OB_ERR_UNEXPECTED; break; diff --git a/src/share/aggregate/sum.cpp b/src/share/aggregate/sum.cpp index 80cc75a00f..9fc88ddae8 100644 --- a/src/share/aggregate/sum.cpp +++ b/src/share/aggregate/sum.cpp @@ -250,6 +250,20 @@ int init_sum_aggregate(RuntimeContext &agg_ctx, const int64_t agg_col_id, } break; } + case VEC_TC_COLLECTION: { + if (out_tc != VEC_TC_COLLECTION) { + ret = OB_ERR_UNEXPECTED; + SQL_LOG(WARN, "unexpected in & out type class", K(in_tc), K(out_tc)); + } else { + if (tmp_res_size != nullptr) { + *tmp_res_size = 0; + } else { + ret = init_agg_func(agg_ctx, agg_col_id, aggr_info.has_distinct_, + allocator, agg); + } + } + break; + } default: { ret = OB_ERR_UNEXPECTED; SQL_LOG(WARN, "unexpected in & out type class", K(in_tc), K(out_tc)); diff --git a/src/share/aggregate/sum.h b/src/share/aggregate/sum.h index 09a4aa4be0..1bf13a86b5 100644 --- a/src/share/aggregate/sum.h +++ b/src/share/aggregate/sum.h @@ -14,6 +14,8 @@ #define OCEANBASE_SHARE_AGGREGATE_SUM_H_ #include "share/aggregate/iaggregate.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" +#include "sql/engine/expr/ob_array_expr_utils.h" #include namespace oceanbase @@ -750,6 +752,269 @@ private: } }; +class SumVectorAggregate final: public BatchAggregateWrapper +{ +public: + static const constexpr VecValueTypeClass IN_TC = VEC_TC_COLLECTION; + static const constexpr VecValueTypeClass OUT_TC = VEC_TC_COLLECTION; + +public: + SumVectorAggregate() {} + + inline int add_one_row(RuntimeContext &agg_ctx, int64_t batch_idx, int64_t batch_size, + const bool is_null, const char *data, const int32_t data_len, + int32_t agg_col_idx, char *agg_cell) + { + int ret = OB_SUCCESS; + ObAggrInfo &aggr_info = agg_ctx.aggr_infos_.at(agg_col_idx); + ObEvalCtx &eval_ctx = agg_ctx.eval_ctx_; + VectorFormat fmt = aggr_info.param_exprs_.at(0)->get_format(eval_ctx); + NotNullBitVector ¬_nulls = agg_ctx.locate_notnulls_bitmap(agg_col_idx, agg_cell); + if (OB_LIKELY(!is_null)) { + if (not_nulls.at(agg_col_idx)) { + ObString array_data(data_len, data); + if (fmt == VEC_UNIFORM || fmt == VEC_UNIFORM_CONST) { + ObLobCommon *lob_comm = (ObLobCommon*)(data); + if (!lob_comm->is_valid()) { + ret = OB_ERR_UNEXPECTED; + SQL_LOG(WARN, "unexpected data", K(ret), K(*lob_comm)); + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&agg_ctx.allocator_, + ObLongTextType, + CS_TYPE_BINARY, + true, + array_data))) { + SQL_LOG(WARN, "fail to get real data.", K(ret), K(array_data)); + } + } + if (OB_SUCC(ret)) { + int32_t agg_cell_len = *reinterpret_cast(agg_cell + sizeof(char *)); + const char *agg_data = reinterpret_cast(*reinterpret_cast(agg_cell)); + ObLobCommon *agg_lob_comm = (ObLobCommon*)(agg_data); + ObString agg_array_data(agg_cell_len, agg_data); + if (!agg_lob_comm->is_valid()) { + ret = OB_ERR_UNEXPECTED; + SQL_LOG(WARN, "unexpected data", K(ret), K(*agg_lob_comm)); + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&agg_ctx.allocator_, + ObLongTextType, + CS_TYPE_BINARY, + true, + agg_array_data))) { + SQL_LOG(WARN, "fail to get real data.", K(ret), K(agg_array_data)); + } else if (array_data.length() != agg_array_data.length()) { + ret = OB_ERR_UNEXPECTED; + SQL_LOG(WARN, "unexpect length", K(ret), K(agg_array_data), K(array_data)); + } else { + // update in-place + int64_t length = array_data.length() / sizeof(float); + float *float_data = reinterpret_cast(array_data.ptr()); + float *float_res = reinterpret_cast(agg_array_data.ptr()); + for (int64_t i = 0; OB_SUCC(ret) && i < length; ++i) { + float_res[i] += float_data[i]; + if (isinff(float_res[i]) != 0) { + ret = OB_OPERATE_OVERFLOW; + SQL_LOG(WARN, "value overflow", K(ret), K(i), K(float_data[i]), K(float_res[i])); + } + } + } + } + } else { + ObString res; + if (fmt == VEC_UNIFORM || fmt == VEC_UNIFORM_CONST) { + char *res_ptr = nullptr; + if (OB_ISNULL(res_ptr = (char*)agg_ctx.allocator_.alloc(data_len))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + SQL_LOG(WARN, "failed to allocator memory", K(ret)); + } else { + MEMCPY(res_ptr, data, data_len); + res.assign(res_ptr, data_len); + } + } else if (OB_FAIL(ObArrayExprUtils::set_array_res(nullptr, data_len, agg_ctx.allocator_, res, data))) { + SQL_LOG(WARN, "failed to set array res", K(ret)); + } + if (OB_SUCC(ret)) { + *reinterpret_cast(agg_cell) = reinterpret_cast(res.ptr()); + *reinterpret_cast(agg_cell + sizeof(char *)) = res.length(); + } + } + if (OB_SUCC(ret)) { + not_nulls.set(agg_col_idx); + } + } + return ret; + } + + template + inline int inner_add_or_sub_row(RuntimeContext &agg_ctx, ColumnFmt &columns, const int32_t row_num, + const int32_t agg_col_id, char *aggr_cell, bool is_add) + { + int ret = OB_SUCCESS; + VectorFormat fmt = columns.get_format(); + NotNullBitVector ¬_nulls = agg_ctx.locate_notnulls_bitmap(agg_col_id, aggr_cell); + const char* param_payload = nullptr; + int32_t param_len = 0; + columns.get_payload(row_num, param_payload, param_len); + if (not_nulls.at(agg_col_id)) { + ObString array_data(param_len, param_payload); + if (fmt == VEC_UNIFORM || fmt == VEC_UNIFORM_CONST) { + ObLobCommon *lob_comm = (ObLobCommon*)(param_payload); + if (!lob_comm->is_valid()) { + ret = OB_ERR_UNEXPECTED; + SQL_LOG(WARN, "unexpected data", K(ret), K(*lob_comm)); + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&agg_ctx.allocator_, + ObLongTextType, + CS_TYPE_BINARY, + true, + array_data))) { + SQL_LOG(WARN, "fail to get real data.", K(ret), K(array_data)); + } + } + if (OB_SUCC(ret)) { + int32_t agg_cell_len = *reinterpret_cast(aggr_cell + sizeof(char *)); + const char *agg_data = reinterpret_cast(*reinterpret_cast(aggr_cell)); + ObLobCommon *agg_lob_comm = (ObLobCommon*)(agg_data); + ObString agg_array_data(agg_cell_len, agg_data); + if (!agg_lob_comm->is_valid()) { + ret = OB_ERR_UNEXPECTED; + SQL_LOG(WARN, "unexpected data", K(ret), K(*agg_lob_comm)); + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&agg_ctx.allocator_, + ObLongTextType, + CS_TYPE_BINARY, + true, + agg_array_data))) { + SQL_LOG(WARN, "fail to get real data.", K(ret), K(agg_array_data)); + } else if (array_data.length() != agg_array_data.length()) { + ret = OB_ERR_UNEXPECTED; + SQL_LOG(WARN, "unexpect length", K(ret), K(agg_array_data), K(array_data)); + } else { + // update in-place + int64_t length = array_data.length() / sizeof(float); + float *float_data = reinterpret_cast(array_data.ptr()); + float *float_res = reinterpret_cast(agg_array_data.ptr()); + for (int64_t i = 0; OB_SUCC(ret) && i < length; ++i) { + is_add ? float_res[i] += float_data[i] : float_res[i] -= float_data[i]; + if (isinff(float_res[i]) != 0) { + ret = OB_OPERATE_OVERFLOW; + SQL_LOG(WARN, "value overflow", K(ret), K(i), K(float_data[i]), K(float_res[i])); + } + } + } + } + } else if (is_add) { + ObString res; + if (fmt == VEC_UNIFORM || fmt == VEC_UNIFORM_CONST) { + char *res_ptr = nullptr; + if (OB_ISNULL(res_ptr = (char*)agg_ctx.allocator_.alloc(param_len))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + SQL_LOG(WARN, "failed to allocator memory", K(ret)); + } else { + MEMCPY(res_ptr, param_payload, param_len); + res.assign(res_ptr, param_len); + } + } else if (OB_FAIL(ObArrayExprUtils::set_array_res(nullptr, param_len, agg_ctx.allocator_, res, param_payload))) { + SQL_LOG(WARN, "failed to set array res", K(ret)); + } + if (OB_SUCC(ret)) { + *reinterpret_cast(aggr_cell) = reinterpret_cast(res.ptr()); + *reinterpret_cast(aggr_cell + sizeof(char *)) = res.length(); + not_nulls.set(agg_col_id); + } + } else { + ret = OB_ERR_UNEXPECTED; + SQL_LOG(WARN, "unexpected null agg_ecll", K(ret), K(is_add)); + } + return ret; + } + + template + inline int add_row(RuntimeContext &agg_ctx, ColumnFmt &columns, const int32_t row_num, + const int32_t agg_col_id, char *aggr_cell, void *tmp_res, int64_t &calc_info) + { + UNUSED(tmp_res); + return inner_add_or_sub_row(agg_ctx, columns, row_num, agg_col_id, aggr_cell, true/*is_add*/); + } + + template + inline int add_nullable_row(RuntimeContext &agg_ctx, ColumnFmt &columns, const int32_t row_num, + const int32_t agg_col_id, char *agg_cell, void *tmp_res, + int64_t &calc_info) + { + int ret = OB_SUCCESS; + if (columns.is_null(row_num)) { + SQL_LOG(DEBUG, "add null row", K(ret), K(row_num)); + } else if (OB_FAIL( + add_row(agg_ctx, columns, row_num, agg_col_id, agg_cell, tmp_res, calc_info))) { + SQL_LOG(WARN, "add row failed", K(ret)); + } else { + NotNullBitVector ¬_nulls = agg_ctx.locate_notnulls_bitmap(agg_col_id, agg_cell); + not_nulls.set(agg_col_id); + } + return ret; + } + + template + inline int sub_row(RuntimeContext &agg_ctx, ColumnFmt &columns, const int32_t row_num, + const int32_t agg_col_id, char *aggr_cell, void *tmp_res, int64_t &calc_info) + { + UNUSED(tmp_res); + return inner_add_or_sub_row(agg_ctx, columns, row_num, agg_col_id, aggr_cell, false/*is_add*/); + } + + template + OB_INLINE int add_or_sub_row(RuntimeContext &agg_ctx, ColumnFmt &columns, const int32_t row_num, + const int32_t agg_col_id, char *agg_cell, void *tmp_res, int64_t &calc_info) + { + int ret = OB_SUCCESS; + bool is_trans = !agg_ctx.removal_info_.is_inverse_agg_; + if (!columns.is_null(row_num)) { + if (is_trans) { + if (OB_FAIL(add_row(agg_ctx, columns, row_num, agg_col_id, agg_cell, tmp_res, calc_info))) { + SQL_LOG(WARN, "add row failed", K(ret)); + } + } else if (OB_FAIL( + sub_row(agg_ctx, columns, row_num, agg_col_id, agg_cell, tmp_res, calc_info))) { + SQL_LOG(WARN, "sub row failed", K(ret)); + } + } else { + if (is_trans) { + agg_ctx.removal_info_.null_cnt_++; + } else { + agg_ctx.removal_info_.null_cnt_--; + } + } + return ret; + } + + template + int collect_group_result(RuntimeContext &agg_ctx, const sql::ObExpr &agg_expr, + const int32_t agg_col_id, const char *agg_cell, + const int32_t agg_cell_len) + { + int ret = OB_SUCCESS; + int64_t output_idx = agg_ctx.eval_ctx_.get_batch_idx(); + const NotNullBitVector ¬_nulls = agg_ctx.locate_notnulls_bitmap(agg_col_id, agg_cell); + ObIVector *output_vec = agg_expr.get_vector(agg_ctx.eval_ctx_); + const char *agg_data = reinterpret_cast(*reinterpret_cast(agg_cell)); + if (OB_LIKELY(not_nulls.at(agg_col_id))) { + ObString res_str(agg_cell_len, agg_data); + static_cast(output_vec)->set_string(output_idx, res_str); + } else { + static_cast(output_vec)->set_null(output_idx); + } + return ret; + } + + virtual int rollup_aggregation(RuntimeContext &agg_ctx, const int32_t agg_col_idx, + AggrRowPtr group_row, AggrRowPtr rollup_row, + int64_t cur_rollup_group_idx, + int64_t max_group_cnt = INT64_MIN) override + { + int ret = OB_NOT_SUPPORTED; + return ret; + } + + TO_STRING_KV("aggregate", "sum_vector"); +}; + } // end aggregate } // end share } // end oceanbase diff --git a/src/share/aggregate/util.h b/src/share/aggregate/util.h index 450fd584b2..30b0f35855 100644 --- a/src/share/aggregate/util.h +++ b/src/share/aggregate/util.h @@ -686,6 +686,7 @@ inline bool agg_res_not_null(const ObItemType agg_op) VEC_TC_DEC_INT128, \ VEC_TC_DEC_INT256, \ VEC_TC_DEC_INT512, \ + VEC_TC_COLLECTION, \ VEC_TC_ROARINGBITMAP } // end namespace aggregate diff --git a/src/share/config/ob_config_helper.cpp b/src/share/config/ob_config_helper.cpp index 278f7e9926..c0de06d5d2 100644 --- a/src/share/config/ob_config_helper.cpp +++ b/src/share/config/ob_config_helper.cpp @@ -34,6 +34,9 @@ #include "share/schema/ob_schema_struct.h" #include "share/ob_ddl_common.h" #include "share/backup/ob_archive_persist_helper.h" +#include "storage/tx_storage/ob_tenant_freezer.h" +#include "share/vector_index/ob_vector_index_util.h" + namespace oceanbase { using namespace share; @@ -176,12 +179,34 @@ bool less_or_equal_tx_share_limit(const uint64_t tenant_id, const int64_t value) return bool_ret; } +bool check_vector_memory_limit(const uint64_t tenant_id, const int64_t value) +{ + bool bool_ret = false; + int64_t vector_memory_limit = 0; + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id)); + if (tenant_config.is_valid()) { + vector_memory_limit = tenant_config->ob_vector_memory_limit_percentage; + if (0 == vector_memory_limit) { + // 0 is default value, which means vector index is disabled,do not need to check + bool_ret = true; + } else if (value + 15 + vector_memory_limit >= 100) { + bool_ret = false; + } else { + bool_ret = true; + } + } else { + bool_ret = false; + OB_LOG_RET(ERROR, OB_INVALID_CONFIG, "tenant config check_vector_memory_limit is invalid",K(value), K(vector_memory_limit), K(tenant_id)); + } + return bool_ret; +} bool ObConfigMemstoreLimitChecker::check(const uint64_t tenant_id, const obrpc::ObAdminSetConfigItem &t) { bool is_valid = false; int64_t value = ObConfigIntParser::get(t.value_.ptr(), is_valid); - if (less_or_equal_tx_share_limit(tenant_id, value)) { + if (less_or_equal_tx_share_limit(tenant_id, value) && + check_vector_memory_limit(tenant_id, value)) { is_valid = true; } else { is_valid = false; @@ -634,6 +659,29 @@ bool ObConfigTenantMemoryChecker::check(const ObConfigItem &t) const return is_valid; } +bool ObConfigVectorMemoryChecker::check(const uint64_t tenant_id, const obrpc::ObAdminSetConfigItem &t) +{ + bool is_valid = false; + int64_t value = ObConfigIntParser::get(t.value_.ptr(), is_valid); + int64_t cur_value = 0; + int64_t upper_limit = 0; + int ret = OB_SUCCESS; + if (is_valid) { + if (value == 0) { + is_valid = true; + } else if (OB_FAIL(ObPluginVectorIndexHelper::get_vector_memory_value_and_limit(tenant_id, cur_value, upper_limit))) { + OB_LOG_RET(ERROR, OB_INVALID_CONFIG, "fail to get_vector_memory_value_and_limit", K(tenant_id)); + } else if (0 < value && value < upper_limit) { + is_valid = true; + } else { + is_valid = false; + } + int64_t memory_size = 0; + ObPluginVectorIndexHelper::get_vector_memory_limit_size(tenant_id, memory_size); + } + return is_valid; +} + bool ObConfigQueryRateLimitChecker::check(const ObConfigItem &t) const { bool is_valid = false; diff --git a/src/share/config/ob_config_helper.h b/src/share/config/ob_config_helper.h index b6faf84b92..ae91d0b312 100644 --- a/src/share/config/ob_config_helper.h +++ b/src/share/config/ob_config_helper.h @@ -925,6 +925,16 @@ private: typedef __ObConfigContainer ObConfigContainer; + +class ObConfigVectorMemoryChecker +{ +public: + static bool check(const uint64_t tenant_id, const obrpc::ObAdminSetConfigItem &t); + +private: + DISALLOW_COPY_AND_ASSIGN(ObConfigVectorMemoryChecker); +}; + } // namespace common } // namespace oceanbase diff --git a/src/share/config/ob_server_config.h b/src/share/config/ob_server_config.h index 010154f2f8..5e6d867847 100644 --- a/src/share/config/ob_server_config.h +++ b/src/share/config/ob_server_config.h @@ -69,6 +69,7 @@ const char* const BALANCER_IDLE_TIME = "balancer_idle_time"; const char* const LOG_DISK_UTILIZATION_LIMIT_THRESHOLD = "log_disk_utilization_limit_threshold"; const char* const LOG_DISK_THROTTLING_PERCENTAGE = "log_disk_throttling_percentage"; const char* const ARCHIVE_LAG_TARGET = "archive_lag_target"; +const char* const OB_VECTOR_MEMORY_LIMIT_PERCENTAGE = "ob_vector_memory_limit_percentage"; class ObServerMemoryConfig; diff --git a/src/share/datum/ob_datum_cmp_func_def.h b/src/share/datum/ob_datum_cmp_func_def.h index e10d2bb4f9..557b85ee7c 100644 --- a/src/share/datum/ob_datum_cmp_func_def.h +++ b/src/share/datum/ob_datum_cmp_func_def.h @@ -433,6 +433,37 @@ struct ObDatumUDTCmp : public ObDefined<> } }; +template +struct ObDatumCollectionCmp : public ObDefined<> +{ + inline static int cmp(const ObDatum &l, const ObDatum &r, int &cmp_ret) + { + int ret = OB_SUCCESS; + cmp_ret = 0; + ObString l_data; + ObString r_data; + common::ObArenaAllocator allocator(ObModIds::OB_LOB_READER, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObTextStringIter l_instr_iter(ObGeometryType, CS_TYPE_BINARY, l.get_string(), HAS_LOB_HEADER); + ObTextStringIter r_instr_iter(ObGeometryType, CS_TYPE_BINARY, r.get_string(), HAS_LOB_HEADER); + if (OB_FAIL(l_instr_iter.init(0, NULL, &allocator))) { + COMMON_LOG(WARN, "Lob: init left lob str iter failed", K(ret), K(l)); + } else if (OB_FAIL(l_instr_iter.get_full_data(l_data))) { + COMMON_LOG(WARN, "Lob: get left lob str iter full data failed ", K(ret), K(l_instr_iter)); + } else if (OB_FAIL(r_instr_iter.init(0, NULL, &allocator))) { + COMMON_LOG(WARN, "Lob: init right lob str iter failed", K(ret), K(ret), K(r)); + } else if (OB_FAIL(r_instr_iter.get_full_data(r_data))) { + COMMON_LOG(WARN, "Lob: get right lob str iter full data failed ", K(ret), K(r_instr_iter)); + } else { + // only memcmp supported now + cmp_ret = MEMCMP(l_data.ptr(), r_data.ptr(), std::min(l_data.length(), r_data.length())); + if (cmp_ret == 0 && l_data.length() != r_data.length()) { + cmp_ret = l_data.length() > r_data.length() ? 1 : -1; + } + } + return ret; + } +}; + /////////////////////////////////////////////////////////////////////////////// // begin define string compare functions /////////////////////////////////////////////////////////////////////////////// diff --git a/src/share/datum/ob_datum_funcs.cpp b/src/share/datum/ob_datum_funcs.cpp index 0069e1d61b..88438ec85d 100644 --- a/src/share/datum/ob_datum_funcs.cpp +++ b/src/share/datum/ob_datum_funcs.cpp @@ -177,6 +177,24 @@ struct ObNullSafeDatumUDTCmp } }; +template +struct ObNullSafeDatumCollectionCmp +{ + inline static int cmp(const ObDatum &l, const ObDatum &r, int &cmp_ret) { + int ret = OB_SUCCESS; + if (OB_UNLIKELY(l.is_null()) && OB_UNLIKELY(r.is_null())) { + cmp_ret = 0; + } else if (OB_UNLIKELY(l.is_null())) { + cmp_ret = NULL_FIRST ? -1 : 1; + } else if (OB_UNLIKELY(r.is_null())) { + cmp_ret = NULL_FIRST ? 1 : -1; + } else { + ret = datum_cmp::ObDatumCollectionCmp::cmp(l, r, cmp_ret); + } + return ret; + } +}; + template struct ObNullSafeFixedDoubleCmp { @@ -372,6 +390,28 @@ struct InitGeoCmpArray bool g_geo_cmp_array_inited = ObArrayConstIniter<1, InitGeoCmpArray>::init(); + +static ObDatumCmpFuncType NULLSAFE_COLLECTION_CMP_FUNCS[2][2]; + +template +struct InitCollectionCmpArray +{ + template + using Cmp = ObNullSafeDatumCollectionCmp; + using Def = datum_cmp::ObDatumCollectionCmp; + + static void init_array() + { + auto &funcs = NULLSAFE_COLLECTION_CMP_FUNCS; + funcs[0][0] = Def::defined_ ? &Cmp<0, 0>::cmp : NULL; + funcs[0][1] = Def::defined_ ? &Cmp<0, 1>::cmp : NULL; + funcs[1][0] = Def::defined_ ? &Cmp<1, 1>::cmp : NULL; + funcs[1][1] = Def::defined_ ? &Cmp<1, 1>::cmp : NULL; + } +}; + +bool g_collection_cmp_array_inited = ObArrayConstIniter<1, InitCollectionCmpArray>::init(); + static ObDatumCmpFuncType FIXED_DOUBLE_CMP_FUNCS[OB_NOT_FIXED_SCALE][2]; template struct InitFixedDoubleCmpArray @@ -438,6 +478,8 @@ ObDatumCmpFuncType ObDatumFuncs::get_nullsafe_cmp_func( func_ptr = FIXED_DOUBLE_CMP_FUNCS[max_scale][null_pos_idx]; } else if (is_geometry(type1) && is_geometry(type2)) { func_ptr = NULLSAFE_GEO_CMP_FUNCS[null_pos_idx][has_lob_header]; + } else if (is_collection(type1) && is_collection(type2)) { + func_ptr = NULLSAFE_COLLECTION_CMP_FUNCS[null_pos_idx][has_lob_header]; } else if (ob_is_decimal_int(type1) && ob_is_decimal_int(type2) && prec1 != PRECISION_UNKNOWN_YET && prec2 != PRECISION_UNKNOWN_YET) { ObDecimalIntWideType lw = get_decimalint_type(prec1); @@ -471,6 +513,12 @@ bool ObDatumFuncs::is_geometry(const ObObjType type) return (tc == ObGeometryTC); } +bool ObDatumFuncs::is_collection(const ObObjType type) +{ + const ObObjTypeClass tc = OBJ_TYPE_TO_CLASS[type]; + return (tc == ObCollectionSQLTC); +} + /** * This function is primarily responsible for handling inconsistent hash computations * for null types and the null values of those types, such as string, float, double, etc. @@ -561,6 +609,20 @@ struct DatumGeoHashCalculator : public DefHashMethod } }; +template +struct DatumCollectionHashCalculator : public DefHashMethod +{ + static int calc_datum_hash(const ObDatum &datum, const uint64_t seed, uint64_t &res) + { + return datum_lob_locator_hash(datum, CS_TYPE_UTF8MB4_BIN, seed, T::is_varchar_hash ? T::hash : NULL, res); + } + + static int calc_datum_hash_v2(const ObDatum &datum, const uint64_t seed, uint64_t &res) + { + return datum_lob_locator_hash(datum, CS_TYPE_UTF8MB4_BIN, seed, T::is_varchar_hash ? T::hash : NULL, res); + } +}; + template struct DatumUDTHashCalculator : public DefHashMethod { @@ -1115,6 +1177,58 @@ struct InitBasicGeoFuncArray } }; +static ObExprBasicFuncs EXPR_BASIC_COLLECTION_FUNCS[2]; +template +struct InitBasicCollectionFuncArray +{ + template + using Hash = DefHashFunc>; + template + using TCCmp = ObNullSafeDatumTCCmp; + using TCDef = datum_cmp::ObDatumTCCmp; + template + using TypeCmp = ObNullSafeDatumCollectionCmp; + using TypeDef = datum_cmp::ObDatumCollectionCmp; + + static void init_array() + { + auto &basic_funcs = EXPR_BASIC_COLLECTION_FUNCS; + basic_funcs[0].default_hash_ = Hash::hash; + basic_funcs[0].default_hash_batch_= Hash::hash_batch; + basic_funcs[0].murmur_hash_ = Hash::hash; + basic_funcs[0].murmur_hash_batch_ = Hash::hash_batch; + basic_funcs[0].xx_hash_ = Hash::hash; + basic_funcs[0].xx_hash_batch_ = Hash::hash_batch; + basic_funcs[0].wy_hash_ = Hash::hash; + basic_funcs[0].wy_hash_batch_ = Hash::hash_batch; + basic_funcs[0].null_first_cmp_ = TypeDef::defined_ + ? &TypeCmp<1, 0>::cmp + : TCDef::defined_ ? &TCCmp<1>::cmp : NULL; + basic_funcs[0].null_last_cmp_ = TypeDef::defined_ + ? &TypeCmp<0, 0>::cmp + : TCDef::defined_ ? &TCCmp<0>::cmp : NULL; + basic_funcs[0].murmur_hash_v2_ = Hash::hash_v2; + basic_funcs[0].murmur_hash_v2_batch_ = Hash::hash_v2_batch; + + basic_funcs[1].default_hash_ = Hash::hash; + basic_funcs[1].default_hash_batch_= Hash::hash_batch; + basic_funcs[1].murmur_hash_ = Hash::hash; + basic_funcs[1].murmur_hash_batch_ = Hash::hash_batch; + basic_funcs[1].xx_hash_ = Hash::hash; + basic_funcs[1].xx_hash_batch_ = Hash::hash_batch; + basic_funcs[1].wy_hash_ = Hash::hash; + basic_funcs[1].wy_hash_batch_ = Hash::hash_batch; + basic_funcs[1].null_first_cmp_ = TypeDef::defined_ + ? &TypeCmp<1, 1>::cmp + : TCDef::defined_ ? &TCCmp<1>::cmp : NULL; + basic_funcs[1].null_last_cmp_ = TypeDef::defined_ + ? &TypeCmp<0, 1>::cmp + : TCDef::defined_ ? &TCCmp<0>::cmp : NULL; + basic_funcs[1].murmur_hash_v2_ = Hash::hash_v2; + basic_funcs[1].murmur_hash_v2_batch_ = Hash::hash_v2_batch; + } +}; + static ObExprBasicFuncs FIXED_DOUBLE_BASIC_FUNCS[OB_NOT_FIXED_SCALE]; template struct InitFixedDoubleBasicFuncArray @@ -1177,6 +1291,7 @@ bool g_basic_funcs_array_inited = ObArrayConstIniter::init(); bool g_basic_json_array_inited = ObArrayConstIniter<1, InitBasicJsonFuncArray>::init(); bool g_basic_geo_array_inited = ObArrayConstIniter<1, InitBasicGeoFuncArray>::init(); +bool g_basic_collection_array_inited = ObArrayConstIniter<1, InitBasicCollectionFuncArray>::init(); bool g_fixed_double_basic_func_array_inited = ObArrayConstIniter::init(); @@ -1247,7 +1362,7 @@ ObExprBasicFuncs* ObDatumFuncs::get_basic_func(const ObObjType type, } else if (ob_is_user_defined_sql_type(type)) { res = &EXPR_BASIC_UDT_FUNCS[0]; } else if (ob_is_collection_sql_type(type)) { - res = &EXPR_BASIC_STR_FUNCS[cs_type][false][has_lob_locator]; + res = &EXPR_BASIC_COLLECTION_FUNCS[has_lob_locator]; } else if (!is_oracle_mode && ob_is_double_type(type) && scale > SCALE_UNKNOWN_YET && scale < OB_NOT_FIXED_SCALE) { res = &FIXED_DOUBLE_BASIC_FUNCS[scale]; @@ -1331,6 +1446,11 @@ REG_SER_FUNC_ARRAY(OB_SFA_DATUM_NULLSAFE_GEO_CMP, NULLSAFE_GEO_CMP_FUNCS, sizeof(NULLSAFE_GEO_CMP_FUNCS) / sizeof(void*)); +static_assert(2 * 2 == sizeof(NULLSAFE_COLLECTION_CMP_FUNCS) / sizeof(void *), + "unexpected size"); +REG_SER_FUNC_ARRAY(OB_SFA_DATUM_NULLSAFE_COLLECTION_CMP, + NULLSAFE_COLLECTION_CMP_FUNCS, + sizeof(NULLSAFE_COLLECTION_CMP_FUNCS) / sizeof(void*)); static_assert(OB_NOT_FIXED_SCALE * 2 == sizeof(FIXED_DOUBLE_CMP_FUNCS) / sizeof(void *), "unexpected size"); @@ -1390,6 +1510,8 @@ static ExprBasicFuncSerPart2 EXPR_BASIC_DECINT_FUNCS_PART2[DECIMAL_INT_MAX]; static ExprBasicFuncSerPart1 EXPR_BASIC_UDT_FUNCS_PART1[1]; static ExprBasicFuncSerPart2 EXPR_BASIC_UDT_FUNCS_PART2[1]; +static ExprBasicFuncSerPart1 EXPR_BASIC_COLLECTION_FUNCS_PART1[2]; +static ExprBasicFuncSerPart2 EXPR_BASIC_COLLECTION_FUNCS_PART2[2]; bool split_basic_func_for_ser(void) { @@ -1424,6 +1546,10 @@ bool split_basic_func_for_ser(void) EXPR_BASIC_UDT_FUNCS_PART1[i].from(EXPR_BASIC_UDT_FUNCS[i]); EXPR_BASIC_UDT_FUNCS_PART2[i].from(EXPR_BASIC_UDT_FUNCS[i]); } + for (int64_t i = 0; i < sizeof(EXPR_BASIC_COLLECTION_FUNCS)/sizeof(ObExprBasicFuncs); i++) { + EXPR_BASIC_COLLECTION_FUNCS_PART1[i].from(EXPR_BASIC_COLLECTION_FUNCS[i]); + EXPR_BASIC_COLLECTION_FUNCS_PART2[i].from(EXPR_BASIC_COLLECTION_FUNCS[i]); + } return true; } bool g_split_basic_func_for_ser = split_basic_func_for_ser(); @@ -1494,5 +1620,14 @@ REG_SER_FUNC_ARRAY(OB_SFA_EXPR_UDT_BASIC_PART2, EXPR_BASIC_UDT_FUNCS_PART2, sizeof(EXPR_BASIC_UDT_FUNCS_PART2) / sizeof(void *)); +static_assert(2 * EXPR_BASIC_FUNC_MEMBER_CNT == sizeof(EXPR_BASIC_COLLECTION_FUNCS) / sizeof(void *), + "unexpected size"); +REG_SER_FUNC_ARRAY(OB_SFA_EXPR_COLLECTION_BASIC_PART1, + EXPR_BASIC_COLLECTION_FUNCS_PART1, + sizeof(EXPR_BASIC_COLLECTION_FUNCS_PART1) / sizeof(void *)); +REG_SER_FUNC_ARRAY(OB_SFA_EXPR_COLLECTION_BASIC_PART2, + EXPR_BASIC_COLLECTION_FUNCS_PART2, + sizeof(EXPR_BASIC_COLLECTION_FUNCS_PART2) / sizeof(void *)); + } // end namespace sql } // end namespace oceanbase diff --git a/src/share/datum/ob_datum_funcs.h b/src/share/datum/ob_datum_funcs.h index 2d43a0bc63..586fb4bf3c 100644 --- a/src/share/datum/ob_datum_funcs.h +++ b/src/share/datum/ob_datum_funcs.h @@ -41,6 +41,7 @@ public: static bool is_string_type(const ObObjType type); static bool is_json(const ObObjType type); static bool is_geometry(const ObObjType type); + static bool is_collection(const ObObjType type); static bool is_varying_len_char_type(const ObObjType type, const ObCollationType cs_type) { return (type == ObNVarchar2Type || (type == ObVarcharType && cs_type != CS_TYPE_BINARY)); } diff --git a/src/share/inner_table/ob_inner_table_schema.12451_12500.cpp b/src/share/inner_table/ob_inner_table_schema.12451_12500.cpp index 926b750eb7..c5268020d0 100644 --- a/src/share/inner_table/ob_inner_table_schema.12451_12500.cpp +++ b/src/share/inner_table/ob_inner_table_schema.12451_12500.cpp @@ -5623,6 +5623,330 @@ int ObInnerTableSchema::all_virtual_spatial_reference_systems_schema(ObTableSche return ret; } +int ObInnerTableSchema::all_virtual_vector_index_info_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(0); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_ip", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 1, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + MAX_IP_ADDR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_port", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 2, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("tenant_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("ls_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("rowkey_vid_table_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("vid_rowkey_table_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("inc_index_table_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("vbitmap_table_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("snapshot_index_table_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("data_table_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("rowkey_vid_tablet_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("vid_rowkey_tablet_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("inc_index_tablet_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("vbitmap_tablet_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("snapshot_index_tablet_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("data_tablet_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("statistics", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + MAX_COLUMN_COMMENT_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("sync_info", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + OB_INNER_TABLE_DEFAULT_KEY_LENTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + if (OB_SUCC(ret)) { + table_schema.get_part_option().set_part_num(1); + table_schema.set_part_level(PARTITION_LEVEL_ONE); + table_schema.get_part_option().set_part_func_type(PARTITION_FUNC_TYPE_LIST_COLUMNS); + if (OB_FAIL(table_schema.get_part_option().set_part_expr("svr_ip, svr_port"))) { + LOG_WARN("set_part_expr failed", K(ret)); + } else if (OB_FAIL(table_schema.mock_list_partition_array())) { + LOG_WARN("mock list partition array failed", K(ret)); + } + } + table_schema.set_index_using_type(USING_HASH); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + } // end namespace share } // end namespace oceanbase diff --git a/src/share/inner_table/ob_inner_table_schema.15451_15500.cpp b/src/share/inner_table/ob_inner_table_schema.15451_15500.cpp index c005878085..b10a0113a6 100644 --- a/src/share/inner_table/ob_inner_table_schema.15451_15500.cpp +++ b/src/share/inner_table/ob_inner_table_schema.15451_15500.cpp @@ -1220,6 +1220,330 @@ int ObInnerTableSchema::all_virtual_spatial_reference_systems_real_agent_ora_sch return ret; } +int ObInnerTableSchema::all_virtual_vector_index_info_ora_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_ORA_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_ORA_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(0); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_ORA_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCollationType::CS_TYPE_UTF8MB4_BIN); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("SVR_IP", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 1, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_UTF8MB4_BIN, //column_collation_type + MAX_IP_ADDR_LENGTH, //column_length + 2, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("SVR_PORT", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 2, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("TENANT_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("LS_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("ROWKEY_VID_TABLE_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("VID_ROWKEY_TABLE_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("INC_INDEX_TABLE_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("VBITMAP_TABLE_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("SNAPSHOT_INDEX_TABLE_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("DATA_TABLE_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("ROWKEY_VID_TABLET_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("VID_ROWKEY_TABLET_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("INC_INDEX_TABLET_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("VBITMAP_TABLET_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("SNAPSHOT_INDEX_TABLET_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("DATA_TABLET_ID", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("STATISTICS", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_UTF8MB4_BIN, //column_collation_type + MAX_COLUMN_COMMENT_LENGTH, //column_length + 2, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("SYNC_INFO", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_UTF8MB4_BIN, //column_collation_type + OB_INNER_TABLE_DEFAULT_KEY_LENTH, //column_length + 2, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + if (OB_SUCC(ret)) { + table_schema.get_part_option().set_part_num(1); + table_schema.set_part_level(PARTITION_LEVEL_ONE); + table_schema.get_part_option().set_part_func_type(PARTITION_FUNC_TYPE_LIST); + if (OB_FAIL(table_schema.get_part_option().set_part_expr("SVR_IP, SVR_PORT"))) { + LOG_WARN("set_part_expr failed", K(ret)); + } else if (OB_FAIL(table_schema.mock_list_partition_array())) { + LOG_WARN("mock list partition array failed", K(ret)); + } + } + table_schema.set_index_using_type(USING_HASH); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + int ObInnerTableSchema::all_virtual_temp_file_ora_schema(ObTableSchema &table_schema) { int ret = OB_SUCCESS; diff --git a/src/share/inner_table/ob_inner_table_schema.h b/src/share/inner_table/ob_inner_table_schema.h index 8570c53670..6a01bf6f93 100644 --- a/src/share/inner_table/ob_inner_table_schema.h +++ b/src/share/inner_table/ob_inner_table_schema.h @@ -1090,6 +1090,7 @@ public: static int all_virtual_nic_info_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_scheduler_job_run_detail_v2_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_spatial_reference_systems_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_vector_index_info_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_temp_file_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_sql_audit_ora_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_plan_stat_ora_schema(share::schema::ObTableSchema &table_schema); @@ -1367,6 +1368,7 @@ public: static int all_virtual_nic_info_ora_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_scheduler_job_run_detail_v2_real_agent_ora_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_spatial_reference_systems_real_agent_ora_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_vector_index_info_ora_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_temp_file_ora_schema(share::schema::ObTableSchema &table_schema); static int gv_ob_plan_cache_stat_schema(share::schema::ObTableSchema &table_schema); static int gv_ob_plan_cache_plan_stat_schema(share::schema::ObTableSchema &table_schema); @@ -3888,6 +3890,7 @@ const schema_create_func virtual_table_schema_creators [] = { ObInnerTableSchema::all_virtual_nic_info_schema, ObInnerTableSchema::all_virtual_scheduler_job_run_detail_v2_schema, ObInnerTableSchema::all_virtual_spatial_reference_systems_schema, + ObInnerTableSchema::all_virtual_vector_index_info_schema, ObInnerTableSchema::all_virtual_temp_file_schema, ObInnerTableSchema::all_virtual_ash_all_virtual_ash_i1_schema, ObInnerTableSchema::all_virtual_sql_plan_monitor_all_virtual_sql_plan_monitor_i1_schema, @@ -4175,6 +4178,7 @@ const schema_create_func virtual_table_schema_creators [] = { ObInnerTableSchema::all_virtual_nic_info_ora_schema, ObInnerTableSchema::all_virtual_scheduler_job_run_detail_v2_real_agent_ora_schema, ObInnerTableSchema::all_virtual_spatial_reference_systems_real_agent_ora_schema, + ObInnerTableSchema::all_virtual_vector_index_info_ora_schema, ObInnerTableSchema::all_virtual_temp_file_ora_schema, ObInnerTableSchema::all_virtual_table_real_agent_ora_idx_data_table_id_real_agent_schema, ObInnerTableSchema::all_virtual_table_real_agent_ora_idx_db_tb_name_real_agent_schema, @@ -5833,6 +5837,7 @@ const uint64_t tenant_space_tables [] = { OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_TID, OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_DETAIL_TID, OB_ALL_VIRTUAL_NIC_INFO_TID, + OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_TID, OB_ALL_VIRTUAL_TEMP_FILE_TID, OB_ALL_VIRTUAL_SQL_AUDIT_ORA_TID, OB_ALL_VIRTUAL_SQL_AUDIT_ORA_ALL_VIRTUAL_SQL_AUDIT_I1_TID, @@ -6119,6 +6124,7 @@ const uint64_t tenant_space_tables [] = { OB_ALL_VIRTUAL_NIC_INFO_ORA_TID, OB_ALL_VIRTUAL_SCHEDULER_JOB_RUN_DETAIL_V2_REAL_AGENT_ORA_TID, OB_ALL_VIRTUAL_SPATIAL_REFERENCE_SYSTEMS_REAL_AGENT_ORA_TID, + OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_ORA_TID, OB_ALL_VIRTUAL_TEMP_FILE_ORA_TID, OB_GV_OB_PLAN_CACHE_STAT_TID, OB_GV_OB_PLAN_CACHE_PLAN_STAT_TID, @@ -7814,6 +7820,7 @@ const uint64_t all_ora_mapping_virtual_table_org_tables [] = { OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_TID, OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_DETAIL_TID, OB_ALL_VIRTUAL_NIC_INFO_TID, + OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_TID, OB_ALL_VIRTUAL_TEMP_FILE_TID, }; const uint64_t all_ora_mapping_virtual_tables [] = { OB_ALL_VIRTUAL_SQL_AUDIT_ORA_TID @@ -7964,6 +7971,7 @@ const uint64_t all_ora_mapping_virtual_tables [] = { OB_ALL_VIRTUAL_SQL_AUDIT_O , OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_ORA_TID , OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_DETAIL_ORA_TID , OB_ALL_VIRTUAL_NIC_INFO_ORA_TID +, OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_ORA_TID , OB_ALL_VIRTUAL_TEMP_FILE_ORA_TID , }; @@ -8494,6 +8502,7 @@ const char* const tenant_space_table_names [] = { OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_TNAME, OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_DETAIL_TNAME, OB_ALL_VIRTUAL_NIC_INFO_TNAME, + OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_TNAME, OB_ALL_VIRTUAL_TEMP_FILE_TNAME, OB_ALL_VIRTUAL_SQL_AUDIT_ORA_TNAME, OB_ALL_VIRTUAL_SQL_AUDIT_ORA_ALL_VIRTUAL_SQL_AUDIT_I1_TNAME, @@ -8780,6 +8789,7 @@ const char* const tenant_space_table_names [] = { OB_ALL_VIRTUAL_NIC_INFO_ORA_TNAME, OB_ALL_VIRTUAL_SCHEDULER_JOB_RUN_DETAIL_V2_REAL_AGENT_ORA_TNAME, OB_ALL_VIRTUAL_SPATIAL_REFERENCE_SYSTEMS_REAL_AGENT_ORA_TNAME, + OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_ORA_TNAME, OB_ALL_VIRTUAL_TEMP_FILE_ORA_TNAME, OB_GV_OB_PLAN_CACHE_STAT_TNAME, OB_GV_OB_PLAN_CACHE_PLAN_STAT_TNAME, @@ -10483,6 +10493,7 @@ const uint64_t tenant_distributed_vtables [] = { OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_TID, OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_DETAIL_TID, OB_ALL_VIRTUAL_NIC_INFO_TID, + OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_TID, OB_ALL_VIRTUAL_TEMP_FILE_TID, OB_ALL_VIRTUAL_SQL_AUDIT_ORA_TID, OB_ALL_VIRTUAL_SQL_AUDIT_ORA_ALL_VIRTUAL_SQL_AUDIT_I1_TID, @@ -10559,6 +10570,7 @@ const uint64_t tenant_distributed_vtables [] = { OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_ORA_TID, OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_DETAIL_ORA_TID, OB_ALL_VIRTUAL_NIC_INFO_ORA_TID, + OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_ORA_TID, OB_ALL_VIRTUAL_TEMP_FILE_ORA_TID, }; const uint64_t restrict_access_virtual_tables[] = { @@ -10693,7 +10705,8 @@ const uint64_t restrict_access_virtual_tables[] = { OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_ORA_TID, OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_DETAIL_ORA_TID, OB_ALL_VIRTUAL_NIC_INFO_ORA_TID, - OB_ALL_VIRTUAL_SPATIAL_REFERENCE_SYSTEMS_REAL_AGENT_ORA_TID }; + OB_ALL_VIRTUAL_SPATIAL_REFERENCE_SYSTEMS_REAL_AGENT_ORA_TID, + OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_ORA_TID }; static inline bool is_restrict_access_virtual_table(const uint64_t tid) @@ -13302,11 +13315,11 @@ static inline int get_sys_table_lob_aux_schema(const uint64_t tid, const int64_t OB_CORE_TABLE_COUNT = 4; const int64_t OB_SYS_TABLE_COUNT = 300; -const int64_t OB_VIRTUAL_TABLE_COUNT = 831; +const int64_t OB_VIRTUAL_TABLE_COUNT = 833; const int64_t OB_SYS_VIEW_COUNT = 930; -const int64_t OB_SYS_TENANT_TABLE_COUNT = 2066; +const int64_t OB_SYS_TENANT_TABLE_COUNT = 2068; const int64_t OB_CORE_SCHEMA_VERSION = 1; -const int64_t OB_BOOTSTRAP_SCHEMA_VERSION = 2069; +const int64_t OB_BOOTSTRAP_SCHEMA_VERSION = 2071; } // end namespace share } // end namespace oceanbase diff --git a/src/share/inner_table/ob_inner_table_schema_constants.h b/src/share/inner_table/ob_inner_table_schema_constants.h index f6b56de057..c58cc37e0d 100644 --- a/src/share/inner_table/ob_inner_table_schema_constants.h +++ b/src/share/inner_table/ob_inner_table_schema_constants.h @@ -790,6 +790,7 @@ const uint64_t OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_DETAIL_TID = 12482; // "__al const uint64_t OB_ALL_VIRTUAL_NIC_INFO_TID = 12487; // "__all_virtual_nic_info" const uint64_t OB_ALL_VIRTUAL_SCHEDULER_JOB_RUN_DETAIL_V2_TID = 12488; // "__all_virtual_scheduler_job_run_detail_v2" const uint64_t OB_ALL_VIRTUAL_SPATIAL_REFERENCE_SYSTEMS_TID = 12490; // "__all_virtual_spatial_reference_systems" +const uint64_t OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_TID = 12496; // "__all_virtual_vector_index_info" const uint64_t OB_ALL_VIRTUAL_TEMP_FILE_TID = 12505; // "__all_virtual_temp_file" const uint64_t OB_ALL_VIRTUAL_SQL_AUDIT_ORA_TID = 15009; // "ALL_VIRTUAL_SQL_AUDIT_ORA" const uint64_t OB_ALL_VIRTUAL_PLAN_STAT_ORA_TID = 15010; // "ALL_VIRTUAL_PLAN_STAT_ORA" @@ -1067,6 +1068,7 @@ const uint64_t OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_DETAIL_ORA_TID = 15451; // " const uint64_t OB_ALL_VIRTUAL_NIC_INFO_ORA_TID = 15456; // "ALL_VIRTUAL_NIC_INFO_ORA" const uint64_t OB_ALL_VIRTUAL_SCHEDULER_JOB_RUN_DETAIL_V2_REAL_AGENT_ORA_TID = 15458; // "ALL_VIRTUAL_SCHEDULER_JOB_RUN_DETAIL_V2_REAL_AGENT_ORA" const uint64_t OB_ALL_VIRTUAL_SPATIAL_REFERENCE_SYSTEMS_REAL_AGENT_ORA_TID = 15459; // "ALL_VIRTUAL_SPATIAL_REFERENCE_SYSTEMS_REAL_AGENT_ORA" +const uint64_t OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_ORA_TID = 15467; // "ALL_VIRTUAL_VECTOR_INDEX_INFO_ORA" const uint64_t OB_ALL_VIRTUAL_TEMP_FILE_ORA_TID = 15485; // "ALL_VIRTUAL_TEMP_FILE_ORA" const uint64_t OB_GV_OB_PLAN_CACHE_STAT_TID = 20001; // "GV$OB_PLAN_CACHE_STAT" const uint64_t OB_GV_OB_PLAN_CACHE_PLAN_STAT_TID = 20002; // "GV$OB_PLAN_CACHE_PLAN_STAT" @@ -3572,6 +3574,7 @@ const char *const OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_DETAIL_TNAME = "__all_vir const char *const OB_ALL_VIRTUAL_NIC_INFO_TNAME = "__all_virtual_nic_info"; const char *const OB_ALL_VIRTUAL_SCHEDULER_JOB_RUN_DETAIL_V2_TNAME = "__all_virtual_scheduler_job_run_detail_v2"; const char *const OB_ALL_VIRTUAL_SPATIAL_REFERENCE_SYSTEMS_TNAME = "__all_virtual_spatial_reference_systems"; +const char *const OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_TNAME = "__all_virtual_vector_index_info"; const char *const OB_ALL_VIRTUAL_TEMP_FILE_TNAME = "__all_virtual_temp_file"; const char *const OB_ALL_VIRTUAL_SQL_AUDIT_ORA_TNAME = "ALL_VIRTUAL_SQL_AUDIT"; const char *const OB_ALL_VIRTUAL_PLAN_STAT_ORA_TNAME = "ALL_VIRTUAL_PLAN_STAT"; @@ -3849,6 +3852,7 @@ const char *const OB_ALL_VIRTUAL_TENANT_RESOURCE_LIMIT_DETAIL_ORA_TNAME = "ALL_V const char *const OB_ALL_VIRTUAL_NIC_INFO_ORA_TNAME = "ALL_VIRTUAL_NIC_INFO"; const char *const OB_ALL_VIRTUAL_SCHEDULER_JOB_RUN_DETAIL_V2_REAL_AGENT_ORA_TNAME = "ALL_VIRTUAL_SCHEDULER_JOB_RUN_DETAIL_V2_REAL_AGENT"; const char *const OB_ALL_VIRTUAL_SPATIAL_REFERENCE_SYSTEMS_REAL_AGENT_ORA_TNAME = "ALL_VIRTUAL_SPATIAL_REFERENCE_SYSTEMS_REAL_AGENT"; +const char *const OB_ALL_VIRTUAL_VECTOR_INDEX_INFO_ORA_TNAME = "ALL_VIRTUAL_VECTOR_INDEX_INFO"; const char *const OB_ALL_VIRTUAL_TEMP_FILE_ORA_TNAME = "ALL_VIRTUAL_TEMP_FILE"; const char *const OB_GV_OB_PLAN_CACHE_STAT_TNAME = "GV$OB_PLAN_CACHE_STAT"; const char *const OB_GV_OB_PLAN_CACHE_PLAN_STAT_TNAME = "GV$OB_PLAN_CACHE_PLAN_STAT"; diff --git a/src/share/inner_table/ob_inner_table_schema_def.py b/src/share/inner_table/ob_inner_table_schema_def.py index f001a70b3b..3c7a3c4c7f 100644 --- a/src/share/inner_table/ob_inner_table_schema_def.py +++ b/src/share/inner_table/ob_inner_table_schema_def.py @@ -14721,7 +14721,41 @@ def_table_schema(**gen_iterate_virtual_table_def( # 12493: __all_virtual_kv_group_commit_status # 12494: __all_virtual_session_sys_variable # 12495: __all_virtual_spm_evo_result -# 12496: __all_virtual_vector_index_info +def_table_schema( + owner = 'huhaosheng.hhs', + table_name = '__all_virtual_vector_index_info', + table_id = '12496', + table_type = 'VIRTUAL_TABLE', + gm_columns = [], + in_tenant_space = True, + rowkey_columns = [ + ], + + normal_columns = [ + ('svr_ip', 'varchar:MAX_IP_ADDR_LENGTH'), + ('svr_port', 'int'), + ('tenant_id', 'int'), + ('ls_id', 'int'), + ('rowkey_vid_table_id', 'int'), + ('vid_rowkey_table_id', 'int'), + ('inc_index_table_id', 'int'), + ('vbitmap_table_id', 'int'), + ('snapshot_index_table_id', 'int'), + ('data_table_id', 'int'), + ('rowkey_vid_tablet_id', 'int'), + ('vid_rowkey_tablet_id', 'int'), + ('inc_index_tablet_id', 'int'), + ('vbitmap_tablet_id', 'int'), + ('snapshot_index_tablet_id', 'int'), + ('data_tablet_id', 'int'), + # memory usage, status..., logic_version + ('statistics', 'varchar:MAX_COLUMN_COMMENT_LENGTH'), + # sync snapshot... + ('sync_info', 'varchar:OB_INNER_TABLE_DEFAULT_KEY_LENTH') + ], + partition_columns = ['svr_ip', 'svr_port'], + vtable_route_policy = 'distributed', +) # 12497: __all_virtual_pkg_type # 12498: __all_virtual_pkg_type_attr @@ -15271,6 +15305,8 @@ def_table_schema(**no_direct_access(gen_oracle_mapping_real_virtual_table_def('1 # 15478: __all_pkg_coll_type # 15479: __all_pkg_coll_type # 15480: __all_virtual_kv_client_info +# +def_table_schema(**no_direct_access(gen_oracle_mapping_virtual_table_def('15467', all_def_keywords['__all_virtual_vector_index_info']))) # 15481: __all_virtual_wr_sql_plan # 15482: __all_virtual_res_mgr_sysstat # 15483: __all_virtual_wr_res_mgr_sysstat diff --git a/src/share/inner_table/sys_package/dbms_vector_body_mysql.sql b/src/share/inner_table/sys_package/dbms_vector_body_mysql.sql new file mode 100644 index 0000000000..9c210eaa96 --- /dev/null +++ b/src/share/inner_table/sys_package/dbms_vector_body_mysql.sql @@ -0,0 +1,55 @@ +CREATE OR REPLACE PACKAGE BODY dbms_vector + + -- ------------------------------------------------------------------------ + -- refresh_index + + PROCEDURE do_refresh_index( + IN idx_name VARCHAR(65535), + IN table_name VARCHAR(65535), + IN idx_vector_col VARCHAR(65535) DEFAULT NULL, + IN refresh_threshold INT DEFAULT 10000, + IN refresh_type VARCHAR(65535) DEFAULT NULL + ); + PRAGMA INTERFACE(C, DBMS_VECTOR_MYSQL_REFRESH_INDEX); + + PROCEDURE refresh_index( + IN idx_name VARCHAR(65535), + IN table_name VARCHAR(65535), + IN idx_vector_col VARCHAR(65535) DEFAULT NULL, + IN refresh_threshold INT DEFAULT 10000, + IN refresh_type VARCHAR(65535) DEFAULT NULL) + BEGIN + COMMIT; + CALL do_refresh_index(idx_name, table_name, idx_vector_col, refresh_threshold, refresh_type); + END; + + -- ------------------------------------------------------------------------ + -- rebuild_index + + PROCEDURE do_rebuild_index ( + IN idx_name VARCHAR(65535), + IN table_name VARCHAR(65535), + IN idx_vector_col VARCHAR(65535) DEFAULT NULL, + IN delta_rate_threshold FLOAT DEFAULT 0.2, + IN idx_organization VARCHAR(65535) DEFAULT NULL, + IN idx_distance_metrics VARCHAR(65535) DEFAULT 'EUCLIDEAN', + IN idx_parameters LONGTEXT DEFAULT NULL, + IN idx_parallel_creation INT DEFAULT 1 + ); + PRAGMA INTERFACE(C, DBMS_VECTOR_MYSQL_REBUILD_INDEX); + + PROCEDURE rebuild_index( + IN idx_name VARCHAR(65535), + IN table_name VARCHAR(65535), + IN idx_vector_col VARCHAR(65535) DEFAULT NULL, + IN delta_rate_threshold FLOAT DEFAULT 0.2, + IN idx_organization VARCHAR(65535) DEFAULT NULL, + IN idx_distance_metrics VARCHAR(65535) DEFAULT 'EUCLIDEAN', + IN idx_parameters LONGTEXT DEFAULT NULL, + IN idx_parallel_creation INT DEFAULT 1) + BEGIN + COMMIT; + CALL do_rebuild_index(idx_name, table_name, idx_vector_col, delta_rate_threshold, idx_organization, idx_distance_metrics, idx_parameters, idx_parallel_creation); + END; + +END dbms_vector; diff --git a/src/share/inner_table/sys_package/dbms_vector_mysql.sql b/src/share/inner_table/sys_package/dbms_vector_mysql.sql new file mode 100644 index 0000000000..90425da0e2 --- /dev/null +++ b/src/share/inner_table/sys_package/dbms_vector_mysql.sql @@ -0,0 +1,78 @@ +CREATE OR REPLACE PACKAGE dbms_vector AUTHID CURRENT_USER + + ------------ + -- OVERVIEW + -- + -- These routines allow the user to refresh and rebuild vector index. + + ------------------------------------------------ + -- SUMMARY OF SERVICES PROVIDED BY THIS PACKAGE + -- + -- refresh_index - refresh delta modification to vector index. + -- rebuild_index - rebuild vector index. + + ---------------------------- + -- PROCEDURES AND FUNCTIONS + -- + + -- ---------------------------------------------------------------------------- + -- Refresh delta modification to vector index for vector ann searching performance. + -- + -- + -- IDX_NAME + -- Name of the vector index. + -- TABLE_NAME + -- Name of the master(base) table. + -- IDX_VECTOR_COL + -- Name of the vector column which vector index is built on. + -- REFRESH_THRESHOLD + -- If the row count of delta_buff_table is greater than REFRESH_THRESHOLD, refreshing is triggered. + -- If not, nothing will happen. + -- REFRESH_TYPE + -- Only FAST is supported now. + -- + -- EXCEPTIONS + -- + + PROCEDURE refresh_index( + IN idx_name VARCHAR(65535), + IN table_name VARCHAR(65535), + IN idx_vector_col VARCHAR(65535) DEFAULT NULL, + IN refresh_threshold INT DEFAULT 10000, + IN refresh_type VARCHAR(65535) DEFAULT NULL); + + -- ----------------------------------------------------------------------- + -- Rebuild vector index. + -- + -- IDX_NAME + -- Name of the vector index. + -- TABLE_NAME + -- Name of the master(base) table. + -- IDX_VECTOR_COL + -- Name of the vector column which vector index is built on. + -- DELTA_RATE_THRESHOLD + -- If (the row count of delta_buff_table + the row count of index_id_table) + -- / the row count of master(base) table is greater than REFRESH_THRESHOLD, rebuilding is triggered. + -- If not, nothing will happen. + -- IDX_ORGANIZATION + -- Vector ann searching algorithm + -- NEIGHBOR PARTITON: ivfflat/ivfpq/... + -- IN MEMORY NEIGHBOR GRAPH: hnsw... + -- IDX_PARAMETERS + -- The parameters of vector ann searching algorithm. + -- IDX_PARALLEL_CREATION + -- The degree of parallization for building vector index. + -- EXCEPTIONS + -- + + PROCEDURE rebuild_index( + IN idx_name VARCHAR(65535), + IN table_name VARCHAR(65535), + IN idx_vector_col VARCHAR(65535) DEFAULT NULL, + IN delta_rate_threshold FLOAT DEFAULT 0.2, + IN idx_organization VARCHAR(65535) DEFAULT NULL, + IN idx_distance_metrics VARCHAR(65535) DEFAULT 'EUCLIDEAN', + IN idx_parameters LONGTEXT DEFAULT NULL, + IN idx_parallel_creation INT DEFAULT 1); + +END dbms_vector; diff --git a/src/share/inner_table/table_id_to_name b/src/share/inner_table/table_id_to_name index af88010caf..dc78652b98 100644 --- a/src/share/inner_table/table_id_to_name +++ b/src/share/inner_table/table_id_to_name @@ -1137,6 +1137,7 @@ # 12488: __all_scheduler_job_run_detail_v2 # BASE_TABLE_NAME # 12490: __all_virtual_spatial_reference_systems # 12490: __all_spatial_reference_systems # BASE_TABLE_NAME +# 12496: __all_virtual_vector_index_info # 12505: __all_virtual_temp_file # 15009: ALL_VIRTUAL_SQL_AUDIT # 15009: __all_virtual_sql_audit # BASE_TABLE_NAME @@ -1749,6 +1750,8 @@ # 15458: __all_scheduler_job_run_detail_v2 # BASE_TABLE_NAME # 15459: ALL_VIRTUAL_SPATIAL_REFERENCE_SYSTEMS_REAL_AGENT # 15459: __all_spatial_reference_systems # BASE_TABLE_NAME +# 15467: ALL_VIRTUAL_VECTOR_INDEX_INFO +# 15467: __all_virtual_vector_index_info # BASE_TABLE_NAME # 15485: ALL_VIRTUAL_TEMP_FILE # 15485: __all_virtual_temp_file # BASE_TABLE_NAME # 20001: GV$OB_PLAN_CACHE_STAT diff --git a/src/share/ob_common_rpc_proxy.h b/src/share/ob_common_rpc_proxy.h index 35f0ada414..46c0edf879 100644 --- a/src/share/ob_common_rpc_proxy.h +++ b/src/share/ob_common_rpc_proxy.h @@ -76,7 +76,7 @@ public: RPC_S(PRD rename_table, obrpc::OB_RENAME_TABLE, (ObRenameTableArg)); RPC_S(PRD truncate_table, obrpc::OB_TRUNCATE_TABLE, (ObTruncateTableArg), ObDDLRes); RPC_S(PRD truncate_table_v2, obrpc::OB_TRUNCATE_TABLE_V2, (ObTruncateTableArg), ObDDLRes); - RPC_S(PRD generate_aux_index_schema, obrpc::OB_GENERATE_AUX_INDEX_SCHEMA, (obrpc::ObGenerateAuxIndexSchemaArg), obrpc::ObGenerateAuxIndexSchemaRes); + RPC_S(PRD create_aux_index, obrpc::OB_CREATE_AUX_INDEX, (obrpc::ObCreateAuxIndexArg), obrpc::ObCreateAuxIndexRes); RPC_S(PRD create_index, obrpc::OB_CREATE_INDEX, (ObCreateIndexArg), ObAlterTableRes); RPC_S(PRD drop_index, obrpc::OB_DROP_INDEX, (ObDropIndexArg), ObDropIndexRes); RPC_S(PRD rebuild_vec_index, obrpc::OB_REBUILD_VEC_INDEX, (ObRebuildIndexArg), ObAlterTableRes); diff --git a/src/share/ob_ddl_common.cpp b/src/share/ob_ddl_common.cpp index ad5e8e93bf..5cd6e01eb8 100644 --- a/src/share/ob_ddl_common.cpp +++ b/src/share/ob_ddl_common.cpp @@ -889,7 +889,13 @@ int ObDDLUtil::generate_build_replica_sql( } } } - if (OB_SUCC(ret) && source_table_schema->is_heap_table() && dest_table_schema->is_index_local_storage()) { + bool need_add_partition_key = source_table_schema->is_heap_table() && dest_table_schema->is_index_local_storage(); + bool is_partitioned_vec_idx_table = dest_table_schema->is_partitioned_table() && dest_table_schema->is_index_local_storage() && + (dest_table_schema->is_vec_delta_buffer_type() || + dest_table_schema->is_vec_index_id_type() || + dest_table_schema->is_vec_index_snapshot_data_type()); + need_add_partition_key = is_partitioned_vec_idx_table || need_add_partition_key; + if (OB_SUCC(ret) && need_add_partition_key) { ObArray src_column_ids; ObSEArray extra_column_ids; if (OB_FAIL(source_table_schema->get_column_ids(src_column_ids))) { @@ -1057,6 +1063,9 @@ int ObDDLUtil::generate_build_replica_sql( LOG_WARN("failed to generated ddl schema hint", K(ret)); } } + if (dest_table_schema->is_vec_vid_rowkey_type()) { + src_table_schema_version_hint_sql_string.reset(); + } if (OB_FAIL(ret)) { } else if (oracle_mode) { if (OB_FAIL(sql_string.assign_fmt("INSERT /*+ monitor enable_parallel_dml parallel(%ld) opt_param('ddl_execution_id', %ld) opt_param('ddl_task_id', %ld) opt_param('enable_newsort', 'false') use_px */INTO \"%.*s\".\"%.*s\" %.*s(%.*s) SELECT /*+ index(\"%.*s\" primary) %.*s */ %.*s from \"%.*s\".\"%.*s\" %.*s as of scn %ld %.*s", @@ -1319,6 +1328,360 @@ int ObDDLUtil::find_table_scan_table_id(const ObOpSpec *spec, uint64_t &table_id return ret; } +int ObDDLUtil::obtain_snapshot( + const share::ObDDLTaskStatus next_task_status, + const uint64_t table_id, + const uint64_t target_table_id, + int64_t &snapshot_version, + bool &snapshot_held, + rootserver::ObDDLTask* task) +{ + int ret = OB_SUCCESS; + rootserver::ObDDLWaitTransEndCtx* wait_trans_ctx = nullptr; + rootserver::ObRootService *root_service = GCTX.root_service_; + if (OB_ISNULL(root_service)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_ISNULL(task)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("invalid argument", K(ret)); + } else if (OB_ISNULL(wait_trans_ctx = task->get_wait_trans_ctx())) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("wait trans ctx is null", K(ret)); + } else if (!task->is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("args have not been inited", K(ret), K(wait_trans_ctx->is_inited()), K(task->is_inited()), K(task->get_task_type())); + } else { + ObDDLTaskStatus new_status = ObDDLTaskStatus::OBTAIN_SNAPSHOT; + uint64_t tenant_id = task->get_src_tenant_id(); + if (!wait_trans_ctx->is_inited()) { + if (OB_FAIL(wait_trans_ctx->init(tenant_id, task->get_task_id(), task->get_object_id(), rootserver::ObDDLWaitTransEndCtx::WAIT_SCHEMA_TRANS, task->get_src_schema_version()))) { + LOG_WARN("fail to init wait trans ctx", K(ret)); + } + } else { + // to get snapshot version. + if (OB_SUCC(ret) && snapshot_version <= 0) { + bool is_trans_end = false; + const bool need_wait_trans_end = false; + if (OB_FAIL(wait_trans_ctx->try_wait(is_trans_end, snapshot_version, need_wait_trans_end))) { + LOG_WARN("just to get snapshot rather than wait trans end", K(ret)); + } + } + DEBUG_SYNC(DDL_REDEFINITION_HOLD_SNAPSHOT); + // try hold snapshot + if (OB_FAIL(ret)) { + } else if (snapshot_version <= 0) { + // the snapshot version obtained here must be valid. + ret = OB_ERR_UNEXPECTED; + LOG_WARN("snapshot version is invalid", K(ret), KPC(wait_trans_ctx)); + } else if (snapshot_version > 0 && !snapshot_held) { + if (OB_FAIL(rootserver::ObDDLTaskRecordOperator::update_snapshot_version(root_service->get_sql_proxy(), + tenant_id, + task->get_task_id(), + snapshot_version))) { + LOG_WARN("update snapshot version failed", K(ret), K(task->get_task_id()), K(tenant_id)); + } else if (OB_FAIL(hold_snapshot(task, table_id, target_table_id, root_service, snapshot_version))) { + if (OB_SNAPSHOT_DISCARDED == ret) { + snapshot_version = 0; + snapshot_held = false; + wait_trans_ctx->reset(); + } else { + LOG_WARN("hold snapshot version failed", K(ret)); + } + } else { + snapshot_held = true; + } + } + + if (OB_FAIL(ret)) { + if (OB_SNAPSHOT_DISCARDED == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to obtain snapshot version", K(ret)); + } + } else { + new_status = next_task_status; + } + } + if (new_status == next_task_status || OB_FAIL(ret)) { + if (OB_FAIL(task->switch_status(new_status, true, ret))) { + LOG_WARN("fail to switch task status", K(ret)); + } + } + task->add_event_info("obtain snapshot finish"); + LOG_INFO("obtain snapshot", K(ret), K(task->get_snapshot_version()), K(table_id), K(target_table_id), K(task->get_src_schema_version()), "ddl_event_info", ObDDLEventInfo()); + } + return ret; +} + +int ObDDLUtil::hold_snapshot( + rootserver::ObDDLTask* task, + const uint64_t table_id, + const uint64_t target_table_id, + rootserver::ObRootService *root_service, + const int64_t snapshot_version) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(task) || OB_ISNULL(root_service)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("invalid argument", K(ret), KP(task), KP(root_service)); + } else if (!task->is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("args have not been inited", K(ret), K(task->get_task_type())); + } else { + ObSEArray tablet_ids; + SCN snapshot_scn; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *data_table_schema = nullptr; + const ObTableSchema *dest_table_schema = nullptr; + uint64_t tenant_id = task->get_src_tenant_id(); + int64_t schema_version = task->get_src_schema_version(); + ObMultiVersionSchemaService &schema_service = ObMultiVersionSchemaService::get_instance(); + if (OB_UNLIKELY(snapshot_version < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(snapshot_version)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task->get_task_id(), DDL_TASK_HOLD_SNAPSHOT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task->get_task_id())); + } else if (OB_FAIL(snapshot_scn.convert_for_tx(snapshot_version))) { + LOG_WARN("failed to convert", K(snapshot_version), K(ret)); + } else if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id, schema_guard))) { + LOG_WARN("get tenant schema guard failed", K(ret)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, table_id, data_table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(table_id)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, target_table_id, dest_table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(target_table_id)); + } else if (OB_ISNULL(data_table_schema) || OB_ISNULL(dest_table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("table not exist", K(ret), K(table_id), K(target_table_id), KP(data_table_schema), KP(dest_table_schema)); + } else if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id, table_id, tablet_ids))) { + LOG_WARN("failed to get data table snapshot", K(ret), K(table_id)); + } else if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id, target_table_id, tablet_ids))) { + LOG_WARN("failed to get dest table snapshot", K(ret), K(target_table_id)); + } else if (data_table_schema->get_aux_lob_meta_tid() != OB_INVALID_ID && + OB_FAIL(ObDDLUtil::get_tablets(tenant_id, data_table_schema->get_aux_lob_meta_tid(), tablet_ids))) { + LOG_WARN("failed to get data lob meta table snapshot", K(ret)); + } else if (data_table_schema->get_aux_lob_piece_tid() != OB_INVALID_ID && + OB_FAIL(ObDDLUtil::get_tablets(tenant_id, data_table_schema->get_aux_lob_piece_tid(), tablet_ids))) { + LOG_WARN("failed to get data lob piece table snapshot", K(ret)); + } else if (dest_table_schema->get_aux_lob_meta_tid() != OB_INVALID_ID && + OB_FAIL(ObDDLUtil::get_tablets(tenant_id, dest_table_schema->get_aux_lob_meta_tid(), tablet_ids))) { + LOG_WARN("failed to get dest lob meta table snapshot", K(ret)); + } else if (dest_table_schema->get_aux_lob_piece_tid() != OB_INVALID_ID && + OB_FAIL(ObDDLUtil::get_tablets(tenant_id, dest_table_schema->get_aux_lob_piece_tid(), tablet_ids))) { + LOG_WARN("failed to get dest lob piece table snapshot", K(ret)); + } else { + rootserver::ObDDLService &ddl_service = root_service->get_ddl_service(); + if (OB_FAIL(ddl_service.get_snapshot_mgr().batch_acquire_snapshot( + ddl_service.get_sql_proxy(), SNAPSHOT_FOR_DDL, tenant_id, schema_version, snapshot_scn, nullptr, tablet_ids))) { + LOG_WARN("batch acquire snapshot failed", K(ret), K(tablet_ids)); + } + } + task->add_event_info("hold snapshot finish"); + LOG_INFO("hold snapshot finished", K(ret), K(task->get_snapshot_version()), K(table_id), K(target_table_id), K(schema_version), "ddl_event_info", ObDDLEventInfo()); + } + return ret; +} + +int ObDDLUtil::release_snapshot( + rootserver::ObDDLTask* task, + const uint64_t table_id, + const uint64_t target_table_id, + const int64_t snapshot_version) +{ + int ret = OB_SUCCESS; + rootserver::ObRootService *root_service = GCTX.root_service_; + ObSEArray tablet_ids; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *data_table_schema = nullptr; + const ObTableSchema *dest_table_schema = nullptr; + ObMultiVersionSchemaService &schema_service = ObMultiVersionSchemaService::get_instance(); + if (OB_ISNULL(root_service)) { + ret = OB_ERR_SYS; + LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_ISNULL(task)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("invalid argument", K(ret)); + } else if (!task->is_inited()) { + ret = OB_NOT_INIT; + LOG_WARN("args have not been inited", K(ret), K(task->get_task_type())); + } else { + uint64_t tenant_id = task->get_src_tenant_id(); + int64_t schema_version = task->get_src_schema_version(); + if (OB_FAIL(DDL_SIM(tenant_id, task->get_task_id(), DDL_TASK_RELEASE_SNAPSHOT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task->get_task_id())); + } else if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id, schema_guard))) { + LOG_WARN("get tenant schema guard failed", K(ret)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, table_id, data_table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(table_id)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, target_table_id, dest_table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(target_table_id)); + } else if (OB_ISNULL(data_table_schema) || OB_ISNULL(dest_table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("table not exist", K(ret), K(table_id), K(target_table_id), KP(data_table_schema), KP(dest_table_schema)); + } else if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id, table_id, tablet_ids))) { + LOG_WARN("failed to get data table snapshot", K(ret)); + } else if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id, target_table_id, tablet_ids))) { + LOG_WARN("failed to get dest table snapshot", K(ret)); + } else if (data_table_schema->get_aux_lob_meta_tid() != OB_INVALID_ID && + OB_FAIL(ObDDLUtil::get_tablets(tenant_id, data_table_schema->get_aux_lob_meta_tid(), tablet_ids))) { + LOG_WARN("failed to get data lob meta table snapshot", K(ret)); + } else if (data_table_schema->get_aux_lob_piece_tid() != OB_INVALID_ID && + OB_FAIL(ObDDLUtil::get_tablets(tenant_id, data_table_schema->get_aux_lob_piece_tid(), tablet_ids))) { + LOG_WARN("failed to get data lob piece table snapshot", K(ret)); + } else if (dest_table_schema->get_aux_lob_meta_tid() != OB_INVALID_ID && + OB_FAIL(ObDDLUtil::get_tablets(tenant_id, dest_table_schema->get_aux_lob_meta_tid(), tablet_ids))) { + LOG_WARN("failed to get dest lob meta table snapshot", K(ret)); + } else if (dest_table_schema->get_aux_lob_piece_tid() != OB_INVALID_ID && + OB_FAIL(ObDDLUtil::get_tablets(tenant_id, dest_table_schema->get_aux_lob_piece_tid(), tablet_ids))) { + LOG_WARN("failed to get dest lob piece table snapshot", K(ret)); + } else if (OB_FAIL(task->batch_release_snapshot(snapshot_version, tablet_ids))) { + LOG_WARN("failed to release snapshot", K(ret)); + } + task->add_event_info("release snapshot finish"); + LOG_INFO("release snapshot finished", K(ret), K(snapshot_version), K(table_id), K(target_table_id), K(schema_version), "ddl_event_info", ObDDLEventInfo()); + } + return ret; +} + +int ObDDLUtil::check_and_cancel_single_replica_dag( + rootserver::ObDDLTask* task, + const uint64_t table_id, + const uint64_t target_table_id, + common::hash::ObHashMap& check_dag_exit_tablets_map, + int64_t &check_dag_exit_retry_cnt, + bool is_complement_data_dag, + bool &all_dag_exit) +{ + int ret = OB_SUCCESS; + all_dag_exit = false; + const bool force_renew = true; + bool is_cache_hit = false; + const int64_t expire_renew_time = force_renew ? INT64_MAX : 0; + share::ObLocationService *location_service = GCTX.location_service_; + rootserver::ObRootService *root_service = GCTX.root_service_; + if (OB_ISNULL(task)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("invalid argument", K(ret)); + } else if (OB_UNLIKELY(!task->is_inited())) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(location_service) || OB_ISNULL(root_service)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret), KP(location_service), KP(root_service)); + } else if (OB_UNLIKELY(!check_dag_exit_tablets_map.created())) { + const int64_t CHECK_DAG_EXIT_BUCKET_NUM = 64; + common::ObArray src_tablet_ids; + common::ObArray dst_tablet_ids; + uint64_t tenant_id = task->get_src_tenant_id(); + uint64_t dst_tenant_id = task->get_tenant_id(); + if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id, table_id, src_tablet_ids))) { + LOG_WARN("fail to get tablets", K(ret), K(tenant_id), K(table_id)); + } else if (OB_FAIL(ObDDLUtil::get_tablets(dst_tenant_id, target_table_id, dst_tablet_ids))) { + LOG_WARN("fail to get tablets", K(ret), K(dst_tenant_id), K(target_table_id)); + } else if (OB_FAIL(check_dag_exit_tablets_map.create(CHECK_DAG_EXIT_BUCKET_NUM, lib::ObLabel("DDLChkDagMap")))) { + LOG_WARN("create hashset set failed", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < src_tablet_ids.count(); i++) { + if (OB_FAIL(check_dag_exit_tablets_map.set_refactored(src_tablet_ids.at(i), dst_tablet_ids.at(i)))) { + LOG_WARN("set refactored failed", K(ret)); + } + } + } + } + if (OB_SUCC(ret)) { + int saved_ret = OB_SUCCESS; + ObAddr unused_leader_addr; + const int64_t timeout_us = ObDDLUtil::get_default_ddl_rpc_timeout(); + common::hash::ObHashMap ::const_iterator iter = + check_dag_exit_tablets_map.begin(); + ObArray dag_not_exist_tablets; + uint64_t tenant_id = task->get_src_tenant_id(); + uint64_t dst_tenant_id = task->get_tenant_id(); + for (; OB_SUCC(ret) && iter != check_dag_exit_tablets_map.end(); iter++) { + ObLSID src_ls_id; + ObLSID dst_ls_id; + const common::ObTabletID &src_tablet_id = iter->first; + const common::ObTabletID &dst_tablet_id = iter->second; + int64_t paxos_member_count = 0; + common::ObArray paxos_server_list; + if (OB_FAIL(ObDDLUtil::get_tablet_leader_addr(location_service, tenant_id, src_tablet_id, timeout_us, src_ls_id, unused_leader_addr))) { + LOG_WARN("get src tablet leader addr failed", K(ret)); + } else if (OB_FAIL(ObDDLUtil::get_tablet_leader_addr(location_service, dst_tenant_id, dst_tablet_id, timeout_us, dst_ls_id, unused_leader_addr))) { + LOG_WARN("get dst tablet leader addr failed", K(ret)); + } else if (OB_FAIL(ObDDLUtil::get_tablet_paxos_member_list(dst_tenant_id, dst_tablet_id, paxos_server_list, paxos_member_count))) { + LOG_WARN("get tablet paxos member list failed", K(ret)); + } else { + bool is_tablet_dag_exist = false; + obrpc::ObDDLBuildSingleReplicaRequestArg arg; + arg.ls_id_ = src_ls_id; + arg.dest_ls_id_ = dst_ls_id; + arg.tenant_id_ = tenant_id; + arg.dest_tenant_id_ = dst_tenant_id; + arg.source_tablet_id_ = src_tablet_id; + arg.dest_tablet_id_ = dst_tablet_id; + arg.source_table_id_ = table_id; + arg.dest_schema_id_ = target_table_id; + arg.schema_version_ = task->get_src_schema_version(); + arg.dest_schema_version_ = task->get_schema_version(); + arg.snapshot_version_ = 1; // to ensure arg valid only. + arg.ddl_type_ = task->get_task_type(); + arg.task_id_ = task->get_task_id(); + arg.parallelism_ = 1; // to ensure arg valid only. + arg.execution_id_ = 1; // to ensure arg valid only. + arg.data_format_version_ = 1; // to ensure arg valid only. + arg.tablet_task_id_ = 1; // to ensure arg valid only. + arg.consumer_group_id_ = 0; // to ensure arg valid only. + for (int64_t j = 0; OB_SUCC(ret) && j < paxos_server_list.count(); j++) { + int tmp_ret = OB_SUCCESS; + obrpc::Bool is_replica_dag_exist(true); + if (is_complement_data_dag && OB_TMP_FAIL(root_service->get_rpc_proxy().to(paxos_server_list.at(j)) + .by(dst_tenant_id).timeout(timeout_us).check_and_cancel_ddl_complement_dag(arg, is_replica_dag_exist))) { + // consider as dag does exist in this server. + saved_ret = OB_SUCC(saved_ret) ? tmp_ret : saved_ret; + is_tablet_dag_exist = true; + LOG_WARN("check and cancel ddl complement dag failed", K(ret), K(tmp_ret), K(arg)); + } else if (!is_complement_data_dag && OB_TMP_FAIL(root_service->get_rpc_proxy().to(paxos_server_list.at(j)) + .by(dst_tenant_id).timeout(timeout_us).check_and_cancel_delete_lob_meta_row_dag(arg, is_replica_dag_exist))) { + // consider as dag does exist in this server. + saved_ret = OB_SUCC(saved_ret) ? tmp_ret : saved_ret; + is_tablet_dag_exist = true; + LOG_WARN("check and cancel ddl complement dag failed", K(ret), K(tmp_ret), K(arg)); + } else if (is_replica_dag_exist) { + is_tablet_dag_exist = true; + if (REACH_COUNT_INTERVAL(1000L)) { + LOG_INFO("wait dag exist", "addr", paxos_server_list.at(j), K(arg)); + } + } + } + if (OB_SUCC(ret) && !is_tablet_dag_exist) { + if (OB_FAIL(dag_not_exist_tablets.push_back(src_tablet_id))) { + LOG_WARN("push back failed", K(ret)); + } + } + } + } + if (OB_SUCC(ret)) { + for (int64_t j = 0; OB_SUCC(ret) && j < dag_not_exist_tablets.count(); j++) { + if (OB_FAIL(check_dag_exit_tablets_map.erase_refactored(dag_not_exist_tablets.at(j)))) { + LOG_WARN("erase failed", K(ret)); + } + } + ret = OB_SUCC(ret) ? saved_ret : ret; + } + } + if (OB_SUCC(ret)) { + all_dag_exit = check_dag_exit_tablets_map.empty() ? true : false; + task->set_delay_schedule_time(3000L * 1000L); // 3s, to avoid sending too many rpcs to the same replica frequently if retry. + } else if (OB_TABLE_NOT_EXIST == ret + || OB_TENANT_HAS_BEEN_DROPPED == ret + || OB_TENANT_NOT_EXIST == ret + || (++check_dag_exit_retry_cnt >= 10 /*MAX RETRY COUNT IF FAILED*/)) { + ret = OB_SUCCESS; + all_dag_exit = true; + } + return ret; +} + int ObDDLUtil::ddl_get_tablet( ObLSHandle &ls_handle, const ObTabletID &tablet_id, @@ -2027,6 +2390,7 @@ int ObDDLUtil::replace_user_tenant_id(const uint64_t tenant_id, ArgType &ddl_arg REPLACE_DDL_ARG_FUNC(obrpc::ObDropDatabaseArg) REPLACE_DDL_ARG_FUNC(obrpc::ObDropTableArg) REPLACE_DDL_ARG_FUNC(obrpc::ObDropIndexArg) +REPLACE_DDL_ARG_FUNC(obrpc::ObRebuildIndexArg) REPLACE_DDL_ARG_FUNC(obrpc::ObTruncateTableArg) #undef REPLACE_DDL_ARG_FUNC diff --git a/src/share/ob_ddl_common.h b/src/share/ob_ddl_common.h index 4ded62933a..c64b084bf2 100644 --- a/src/share/ob_ddl_common.h +++ b/src/share/ob_ddl_common.h @@ -29,6 +29,7 @@ struct ObAlterTableArg; struct ObDropDatabaseArg; struct ObDropTableArg; struct ObDropIndexArg; +struct ObRebuildIndexArg; struct ObTruncateTableArg; struct ObCreateIndexArg; struct ObIndexArg; @@ -45,6 +46,11 @@ class ObLSHandle; struct ObStorageColumnGroupSchema; class ObCOSSTableV2; } +namespace rootserver +{ +class ObDDLTask; +class ObDDLWaitTransEndCtx; +} namespace share { class ObLocationService; @@ -140,7 +146,8 @@ enum ObDDLTaskType MODIFY_NOT_NULL_COLUMN_STATE_TASK = 11, MAKE_RECOVER_RESTORE_TABLE_TASK_TAKE_EFFECT = 12, PARTITION_SPLIT_RECOVERY_TASK = 13, - PARTITION_SPLIT_RECOVERY_CLEANUP_GARBAGE_TASK = 14 + PARTITION_SPLIT_RECOVERY_CLEANUP_GARBAGE_TASK = 14, + SWITCH_VEC_INDEX_NAME_TASK = 15, }; enum ObDDLTaskStatus { @@ -379,6 +386,11 @@ static inline bool is_complement_data_relying_on_dag(const ObDDLType type) || DDL_TABLE_RESTORE == type; } +static inline bool is_delete_lob_meta_row_relying_on_dag(const ObDDLType type) +{ + return DDL_DROP_VEC_INDEX == type; +} + static inline bool is_invalid_ddl_type(const ObDDLType type) { return DDL_INVALID == type; @@ -686,6 +698,7 @@ public: static int replace_user_tenant_id(const uint64_t tenant_id, obrpc::ObDropDatabaseArg &drop_db_arg); static int replace_user_tenant_id(const uint64_t tenant_id, obrpc::ObDropTableArg &drop_table_arg); static int replace_user_tenant_id(const uint64_t tenant_id, obrpc::ObDropIndexArg &drop_index_arg); + static int replace_user_tenant_id(const uint64_t tenant_id, obrpc::ObRebuildIndexArg &rebuild_index_arg); static int replace_user_tenant_id(const uint64_t tenant_id, obrpc::ObTruncateTableArg &trucnate_table_arg); static int replace_user_tenant_id(const uint64_t tenant_id, obrpc::ObCreateIndexArg &create_index_arg); @@ -781,6 +794,7 @@ public: case DDL_CREATE_INDEX: case DDL_CREATE_MLOG: case DDL_CREATE_FTS_INDEX: + case DDL_CREATE_VEC_INDEX: case DDL_CREATE_PARTITIONED_LOCAL_INDEX: case DDL_AUTO_SPLIT_BY_RANGE: case DDL_AUTO_SPLIT_NON_RANGE: @@ -798,8 +812,33 @@ public: } static bool use_idempotent_mode(const int64_t data_format_version, const share::ObDDLType task_type); static int64_t get_real_parallelism(const int64_t parallelism, const bool is_mv_refresh); - + static int obtain_snapshot( + const share::ObDDLTaskStatus next_task_status, + const uint64_t table_id, + const uint64_t target_table_id, + int64_t &snapshot_version, + bool &snapshot_held, + rootserver::ObDDLTask* task); + static int release_snapshot( + rootserver::ObDDLTask* task, + const uint64_t table_id, + const uint64_t target_table_id, + const int64_t snapshot_version); + static int check_and_cancel_single_replica_dag( + rootserver::ObDDLTask* task, + const uint64_t table_id, + const uint64_t target_table_id, + common::hash::ObHashMap& check_dag_exit_tablets_map, + int64_t &check_dag_exit_retry_cnt, + bool is_complement_data_dag, + bool &all_dag_exit); private: + static int hold_snapshot( + rootserver::ObDDLTask* task, + const uint64_t table_id, + const uint64_t target_table_id, + rootserver::ObRootService *root_service, + const int64_t snapshot_version); static int batch_check_tablet_checksum( const uint64_t tenant_id, const int64_t start_idx, diff --git a/src/share/ob_debug_sync_point.h b/src/share/ob_debug_sync_point.h index 0c3a9ca237..31756f7ce4 100755 --- a/src/share/ob_debug_sync_point.h +++ b/src/share/ob_debug_sync_point.h @@ -625,6 +625,8 @@ class ObString; ACT(BEFORE_REMOVE_BALANCE_TASK_HELPER,)\ ACT(BEFORE_CHOOSE_SOURCE,)\ ACT(AFTER_CHECK_LOG_NEED_REBUILD,)\ + ACT(REBUILD_VEC_INDEX_WAIT_CREATE_NEW_INDEX,)\ + ACT(REBUILD_VEC_INDEX_WAIT_DROP_OLD_INDEX,)\ ACT(BEFORE_SEND_ALTER_TABLE,)\ ACT(BEFOR_EXEC_REBUILD_TASK,)\ ACT(BEFORE_CREATE_HIDDEN_TABLE_IN_LOAD,)\ diff --git a/src/share/ob_domain_index_builder_util.cpp b/src/share/ob_domain_index_builder_util.cpp new file mode 100644 index 0000000000..27694e65e1 --- /dev/null +++ b/src/share/ob_domain_index_builder_util.cpp @@ -0,0 +1,108 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX COMMON +#include "ob_domain_index_builder_util.h" +#include "src/rootserver/ob_ddl_service.h" +#include "src/rootserver/ob_root_service.h" + +namespace oceanbase +{ +using namespace common; +using namespace obrpc; +using namespace share::schema; +namespace share +{ + +int ObDomainIndexBuilderUtil::prepare_aux_table(bool &task_submitted, + uint64_t &aux_table_id, + int64_t &res_task_id, + const common::TCRWLock& lock, + const uint64_t& data_table_id, + const uint64_t& tenant_id, + const int64_t& task_id, + obrpc::ObCreateIndexArg& index_arg, + rootserver::ObRootService *root_service, + common::hash::ObHashMap &map, + const oceanbase::common::ObAddr &addr, + int map_num) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(root_service)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("root_service is nullptr", K(ret)); + } else { + int64_t ddl_rpc_timeout = 0; + rootserver::ObDDLService &ddl_service = root_service->get_ddl_service(); + obrpc::ObCommonRpcProxy *common_rpc = nullptr; + if (!map.created() && + OB_FAIL(map.create(map_num, lib::ObLabel("DepTasMap")))) { + LOG_WARN("create dependent task map failed", K(ret)); + } else if (OB_ISNULL(root_service)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("root_service is nullptr", K(ret)); + } else if (OB_FALSE_IT(common_rpc = root_service->get_ddl_service().get_common_rpc())) { + } else if (OB_ISNULL(common_rpc)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("common rpc is nullptr", K(ret)); + } else if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout(tenant_id, + data_table_id, + ddl_rpc_timeout))) { + LOG_WARN("get ddl rpc timeout fail", K(ret)); + } else { + SMART_VARS_2((obrpc::ObCreateAuxIndexArg, arg), + (obrpc::ObCreateAuxIndexRes, res)) { + arg.tenant_id_ = tenant_id; + arg.exec_tenant_id_ = tenant_id; + arg.data_table_id_ = data_table_id; + arg.task_id_ = task_id; + if (task_submitted) { + // do nothing + } else if (OB_FAIL(arg.create_index_arg_.assign(index_arg))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to assign create index arg", K(ret)); + } else if (OB_FAIL(common_rpc-> to(addr). + timeout(ddl_rpc_timeout).create_aux_index(arg, res))) { + LOG_WARN("generate aux index schema failed", K(ret), K(arg)); + } else if (res.schema_generated_) { + task_submitted = true; + aux_table_id = res.aux_table_id_; + if (res.ddl_task_id_ < 0) { + // res_task_id is int64, while OB_INVALID_ID is unit64_max, so use "res.ddl_task_id_ < 0" other than "res.ddl_task_id_ == OB_INVALID_ID" + // rowkey_vid/vid_rowkey table already exist and data is ready + res_task_id = OB_INVALID_ID; + } else { // need to wait data complement finish + res_task_id = res.ddl_task_id_; + TCWLockGuard guard(lock); + share::ObDomainDependTaskStatus status; + // check if child task is already added + if (OB_FAIL(map.get_refactored(aux_table_id, status))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + status.task_id_ = res.ddl_task_id_; + if (OB_FAIL(map.set_refactored(aux_table_id, status))) { + LOG_WARN("set dependent task map failed", K(ret), K(aux_table_id)); + } + } else { + LOG_WARN("get from dependent task map failed", K(ret)); + } + } + } + } + } // SMART_VAR + } + } + return ret; +} + +}//end namespace share +}//end namespace oceanbase \ No newline at end of file diff --git a/src/share/ob_domain_index_builder_util.h b/src/share/ob_domain_index_builder_util.h new file mode 100644 index 0000000000..faef79c6c9 --- /dev/null +++ b/src/share/ob_domain_index_builder_util.h @@ -0,0 +1,57 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SHARE_DOMAIN_INDEX_BUILDER_UTIL_H_ +#define OCEANBASE_SHARE_DOMAIN_INDEX_BUILDER_UTIL_H_ + +#include "src/rootserver/ddl_task/ob_ddl_task.h" + +namespace oceanbase +{ +namespace share +{ + +struct ObDomainDependTaskStatus final +{ +public: +ObDomainDependTaskStatus() + : ret_code_(INT64_MAX), task_id_(0) +{} +~ObDomainDependTaskStatus() = default; +TO_STRING_KV(K_(task_id), K_(ret_code)); +public: +int64_t ret_code_; +int64_t task_id_; +}; + +class ObDomainIndexBuilderUtil +{ +public: + static int prepare_aux_table(bool &task_submitted, + uint64_t &aux_table_id, + int64_t &res_task_id, + const common::TCRWLock& lock, + const uint64_t& data_table_id, + const uint64_t& tenant_id, + const int64_t& task_id, + obrpc::ObCreateIndexArg& index_arg, + rootserver::ObRootService *root_service, + common::hash::ObHashMap &map, + const oceanbase::common::ObAddr &addr, + int map_num); +}; + + +}//end namespace share +}//end namespace oceanbase + +#endif //OCEANBASE_SHARE_DOMAIN_INDEX_BUILDER_UTIL_H_ diff --git a/src/share/ob_fts_index_builder_util.cpp b/src/share/ob_fts_index_builder_util.cpp index 259dbce404..58797b1b9c 100644 --- a/src/share/ob_fts_index_builder_util.cpp +++ b/src/share/ob_fts_index_builder_util.cpp @@ -1565,7 +1565,7 @@ int ObFtsIndexBuilderUtil::check_fts_or_multivalue_index_allowed( LOG_WARN("invalid argument", K(ret), K(data_schema)); } else if (data_schema.is_partitioned_table() && data_schema.is_heap_table()) { ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "create full-text or multi-value index on partition table without primary key"); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create full-text or multi-value or vector index on partition table without primary key"); } return ret; } diff --git a/src/share/ob_index_builder_util.cpp b/src/share/ob_index_builder_util.cpp index 9fc1d23f17..624cea9b30 100644 --- a/src/share/ob_index_builder_util.cpp +++ b/src/share/ob_index_builder_util.cpp @@ -13,6 +13,7 @@ #define USING_LOG_PREFIX SHARE #include "ob_index_builder_util.h" #include "ob_fts_index_builder_util.h" +#include "ob_vec_index_builder_util.h" #include "share/ob_define.h" #include "lib/container/ob_array_iterator.h" @@ -37,9 +38,11 @@ void ObIndexBuilderUtil::del_column_flags_and_default_value(ObColumnSchemaV2 &co { if ((column.is_generated_column() && !column.is_fulltext_column() && + !column.is_vec_index_column() && !column.is_spatial_generated_column() && !column.is_multivalue_generated_column() && - !column.is_multivalue_generated_array_column()) + !column.is_multivalue_generated_array_column() && + !column.is_vec_index_column()) || column.is_identity_column()) { if (column.is_virtual_generated_column()) { column.del_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); @@ -152,6 +155,16 @@ int ObIndexBuilderUtil::add_column( LOG_WARN("set current default value failed", K(ret)); } } + if (column.is_vec_index_column()) { + ObObj default_value; + column.del_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + column.set_is_hidden(false); + if (FAILEDx(column.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(column.set_cur_default_value(default_value, column.is_default_expr_v2_column()))) { + LOG_WARN("set current default value failed", K(ret)); + } + } if (OB_FAIL(ret)) { } else if (OB_FAIL(table_schema.add_column(column))) { LOG_WARN("add_column failed", K(column), K(ret)); @@ -370,6 +383,28 @@ int ObIndexBuilderUtil::set_index_table_columns( } // no matter what index col of data table is, columns of 4 aux fts table is fixed if (OB_FAIL(ret)) { + } else if (is_vec_index(arg.index_type_)) { + if (is_vec_rowkey_vid_type(arg.index_type_)) { + if (OB_FAIL(ObVecIndexBuilderUtil::set_vec_rowkey_vid_table_columns(arg, data_schema, index_schema))) { + LOG_WARN("fail to set vec rowkey vid table column", K(ret)); + } + } else if (is_vec_vid_rowkey_type(arg.index_type_)) { + if (OB_FAIL(ObVecIndexBuilderUtil::set_vec_vid_rowkey_table_columns(arg, data_schema, index_schema))) { + LOG_WARN("fail to set vec vid rowkey table column", K(ret)); + } + } else if (is_vec_delta_buffer_type(arg.index_type_)) { + if (OB_FAIL(ObVecIndexBuilderUtil::set_vec_delta_buffer_table_columns(arg, data_schema, index_schema))) { + LOG_WARN("fail to set vec vid rowkey table column", K(ret)); + } + } else if (is_vec_index_id_type(arg.index_type_)) { + if (OB_FAIL(ObVecIndexBuilderUtil::set_vec_index_id_table_columns(arg, data_schema, index_schema))) { + LOG_WARN("fail to set vec vid rowkey table column", K(ret)); + } + } else if (is_vec_index_snapshot_data_type(arg.index_type_)) { + if (OB_FAIL(ObVecIndexBuilderUtil::set_vec_index_snapshot_data_table_columns(arg, data_schema, index_schema))) { + LOG_WARN("fail to set vec vid rowkey table column", K(ret)); + } + } } else if (is_fts_index(arg.index_type_) || is_multivalue_index(arg.index_type_)) { if (is_doc_rowkey_aux(arg.index_type_)) { @@ -691,6 +726,12 @@ int ObIndexBuilderUtil::adjust_expr_index_args( } else if (OB_FAIL(gen_columns.push_back(spatial_cols.at(1)))) { LOG_WARN("push back mbr column to gen columns failed", K(ret)); } + } else if (is_vec_index(arg.index_type_)) { + if (OB_FAIL(ObVecIndexBuilderUtil::check_vec_index_allowed(data_schema))) { + LOG_WARN("fail to check vector index allowed", K(ret)); + } else if (OB_FAIL(ObVecIndexBuilderUtil::adjust_vec_args(arg, data_schema, allocator, gen_columns))) { + LOG_WARN("failed to adjust vec index args", K(ret)); + } } else if (is_fts_index(arg.index_type_)) { if (OB_FAIL(ObFtsIndexBuilderUtil::check_fts_or_multivalue_index_allowed(data_schema))) { LOG_WARN("fail to check fts index allowed", K(ret)); diff --git a/src/share/ob_lob_access_utils.cpp b/src/share/ob_lob_access_utils.cpp index 4150e33460..6b2f84a74b 100644 --- a/src/share/ob_lob_access_utils.cpp +++ b/src/share/ob_lob_access_utils.cpp @@ -1248,6 +1248,32 @@ int ObTextStringResult::init(int64_t res_len, ObIAllocator *allocator) return ret; } +int ObTextStringResult::init(const int64_t res_len, ObString &res_buffer) +{ + int ret = OB_SUCCESS; + if (is_init_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Lob: textstring result init already", K(ret), K(*this)); + } else if (!(ob_is_string_or_lob_type(type_) || is_lob_storage(type_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Lob: unexpected expr result type for textstring result", K(ret), K(type_)); + } else if (OB_FAIL(calc_buffer_len(res_len))) { + LOG_WARN("fail to calc buffer len", K(ret), K(res_len)); + } else if (buff_len_ != res_buffer.length()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Lob: res buffer is not enough", K(ret), K(buff_len_), K(res_buffer)); + } else { + buffer_ = res_buffer.ptr(); + if (OB_FAIL(fill_temp_lob_header(res_len))) { + LOG_WARN("Lob: fill_temp_lob_header failed", K(ret), K(type_), K(res_len)); + } else { + is_init_ = true; + } + } + + return ret; +} + int ObTextStringResult::copy(const ObLobLocatorV2 *loc) { int ret = OB_SUCCESS; diff --git a/src/share/ob_lob_access_utils.h b/src/share/ob_lob_access_utils.h index c53f113fb3..637dd5303c 100644 --- a/src/share/ob_lob_access_utils.h +++ b/src/share/ob_lob_access_utils.h @@ -252,6 +252,7 @@ public: // 1. all lobs created by this class should be temp lobs // 2. if has_lob_header_ is false, the text result should be 4.0 compatible int init(const int64_t res_len, ObIAllocator *allocator = NULL); + int init(const int64_t res_len, ObString &res_buffer); // copy existent loc to result int copy(const ObLobLocatorV2 *loc); @@ -288,9 +289,10 @@ public: static int calc_inrow_templob_len(uint32 inrow_data_len, int64_t &templob_len); static int64_t calc_inrow_templob_locator_len(); static int fill_inrow_templob_header(const int64_t inrow_data_len, char *buf, int64_t buf_len); + int calc_buffer_len(const int64_t res_len); + OB_INLINE int64_t get_buff_len() { return buff_len_; } protected: - int calc_buffer_len(const int64_t res_len); int fill_temp_lob_header(const int64_t res_len); protected: diff --git a/src/share/ob_rpc_struct.cpp b/src/share/ob_rpc_struct.cpp index 77d183c6c3..2911f1fe88 100644 --- a/src/share/ob_rpc_struct.cpp +++ b/src/share/ob_rpc_struct.cpp @@ -974,6 +974,132 @@ OB_SERIALIZE_MEMBER((ObDropResourceUnitArg, ObDDLArg), unit_name_, if_exist_); +bool ObVectorIndexRebuildArg::is_valid() const +{ + return OB_INVALID_TENANT_ID != exec_tenant_id_ && + OB_INVALID_TENANT_ID != tenant_id_ && + OB_INVALID_ID != data_table_id_ && + OB_INVALID_ID != index_id_table_id_; +} + +void ObVectorIndexRebuildArg::reset() +{ + tenant_id_ = OB_INVALID_TENANT_ID; + data_table_id_ = OB_INVALID_ID; + index_id_table_id_ = OB_INVALID_ID; + session_id_ = OB_INVALID_ID; + sql_mode_ = 0; + tz_info_.reset(); + tz_info_wrap_.reset(); + for (int64_t i = 0; i < ObNLSFormatEnum::NLS_MAX; ++i) { + nls_formats_[i].reset(); + } + allocator_.reset(); + ObDDLArg::reset(); +} + +int ObVectorIndexRebuildArg::assign(const ObVectorIndexRebuildArg &other) +{ + int ret = OB_SUCCESS; + tenant_id_ = other.tenant_id_; + data_table_id_ = other.data_table_id_; + index_id_table_id_ = other.index_id_table_id_; + session_id_ = other.session_id_; + sql_mode_ = other.sql_mode_; + if (OB_FAIL(tz_info_.assign(other.tz_info_))) { + LOG_WARN("fail to assign tz info", KR(ret), "tz_info", other.tz_info_); + } else if (OB_FAIL(tz_info_wrap_.deep_copy(other.tz_info_wrap_))) { + LOG_WARN("fail to deep copy tz info wrap", KR(ret), "tz_info_wrap", other.tz_info_wrap_); + } + for (int64_t i = 0; OB_SUCC(ret) && i < ObNLSFormatEnum::NLS_MAX; i++) { + if (OB_FAIL(ob_write_string(allocator_, other.nls_formats_[i], nls_formats_[i]))) { + LOG_WARN("fail to deep copy nls format", KR(ret), K(i), "nls_format", other.nls_formats_[i]); + } + } + return ret; +} + +OB_DEF_SERIALIZE(ObVectorIndexRebuildArg) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), KPC(this)); + } else { + BASE_SER((, ObDDLArg)); + LST_DO_CODE(OB_UNIS_ENCODE, + tenant_id_, + data_table_id_, + index_id_table_id_, + session_id_, + sql_mode_, + tz_info_, + tz_info_wrap_); + OB_UNIS_ENCODE_ARRAY(nls_formats_, ObNLSFormatEnum::NLS_MAX); + } + return ret; +} + +OB_DEF_DESERIALIZE(ObVectorIndexRebuildArg) +{ + int ret = OB_SUCCESS; + reset(); + int64_t nls_formats_count = -1; + ObString nls_formats[ObNLSFormatEnum::NLS_MAX]; + BASE_DESER((, ObDDLArg)); + LST_DO_CODE(OB_UNIS_DECODE, + tenant_id_, + data_table_id_, + index_id_table_id_, + session_id_, + sql_mode_, + tz_info_, + tz_info_wrap_); + OB_UNIS_DECODE(nls_formats_count); + if (OB_SUCC(ret)) { + if (OB_UNLIKELY(ObNLSFormatEnum::NLS_MAX != nls_formats_count)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nls formats count", KR(ret), K(nls_formats_count)); + } + OB_UNIS_DECODE_ARRAY(nls_formats, nls_formats_count); + for (int64_t i = 0; OB_SUCC(ret) && i < nls_formats_count; i++) { + if (OB_FAIL(ob_write_string(allocator_, nls_formats[i], nls_formats_[i]))) { + LOG_WARN("fail to deep copy nls format", KR(ret), K(i), K(nls_formats[i])); + } + } + } + return ret; +} + +OB_DEF_SERIALIZE_SIZE(ObVectorIndexRebuildArg) +{ + int ret = OB_SUCCESS; + int64_t len = 0; + if (OB_UNLIKELY(!is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), KPC(this)); + } else { + BASE_ADD_LEN((, ObDDLArg)); + LST_DO_CODE(OB_UNIS_ADD_LEN, + tenant_id_, + data_table_id_, + index_id_table_id_, + session_id_, + sql_mode_, + tz_info_, + tz_info_wrap_); + OB_UNIS_ADD_LEN_ARRAY(nls_formats_, ObNLSFormatEnum::NLS_MAX); + } + if (OB_FAIL(ret)) { + len = -1; + } + return len; +} + +OB_SERIALIZE_MEMBER(ObVectorIndexRebuildRes, + task_id_, + trace_id_); + bool ObMViewCompleteRefreshArg::is_valid() const { bool bret = OB_INVALID_TENANT_ID != exec_tenant_id_ && @@ -3271,6 +3397,7 @@ DEF_TO_STRING(ObCreateIndexArg) K_(local_session_var), K_(exist_all_column_group), K_(index_cgs), + K_(vidx_refresh_info), K_(is_rebuild_index)); J_OBJ_END(); return pos; @@ -3299,9 +3426,10 @@ OB_SERIALIZE_MEMBER((ObCreateIndexArg, ObIndexArg), exist_all_column_group_, index_cgs_, vidx_refresh_info_, - is_rebuild_index_); + is_rebuild_index_ + ); -int ObGenerateAuxIndexSchemaArg::assign(const ObGenerateAuxIndexSchemaArg &other) +int ObCreateAuxIndexArg::assign(const ObCreateAuxIndexArg &other) { int ret = OB_SUCCESS; if (OB_FAIL(create_index_arg_.assign(other.create_index_arg_))) { @@ -3313,12 +3441,13 @@ int ObGenerateAuxIndexSchemaArg::assign(const ObGenerateAuxIndexSchemaArg &other return ret; } -OB_SERIALIZE_MEMBER((ObGenerateAuxIndexSchemaArg, ObDDLArg), +OB_SERIALIZE_MEMBER((ObCreateAuxIndexArg, ObDDLArg), tenant_id_, data_table_id_, create_index_arg_); -OB_SERIALIZE_MEMBER(ObGenerateAuxIndexSchemaRes, +OB_SERIALIZE_MEMBER(ObCreateAuxIndexRes, aux_table_id_, + ddl_task_id_, schema_generated_); bool ObAlterIndexArg::is_valid() const diff --git a/src/share/ob_rpc_struct.h b/src/share/ob_rpc_struct.h index 1a7b8bbdc8..0c5101c38c 100644 --- a/src/share/ob_rpc_struct.h +++ b/src/share/ob_rpc_struct.h @@ -2003,6 +2003,73 @@ public: share::ObTaskId trace_id_; }; +struct ObVectorIndexRebuildArg final : public ObDDLArg +{ + OB_UNIS_VERSION(1); +public: + ObVectorIndexRebuildArg() + : ObDDLArg(), + tenant_id_(OB_INVALID_TENANT_ID), + data_table_id_(OB_INVALID_ID), + index_id_table_id_(OB_INVALID_ID), + session_id_(OB_INVALID_ID), + sql_mode_(0), + tz_info_(), + tz_info_wrap_(), + nls_formats_() + { + } + ~ObVectorIndexRebuildArg() = default; + bool is_valid() const; + void reset(); + int assign(const ObVectorIndexRebuildArg &other); + INHERIT_TO_STRING_KV("ObDDLArg", ObDDLArg, + K_(tenant_id), + K_(data_table_id), + K_(index_id_table_id), + K_(session_id), + K_(sql_mode), + K_(tz_info), + K_(tz_info_wrap), + "nls_formats", common::ObArrayWrap(nls_formats_, common::ObNLSFormatEnum::NLS_MAX)); +public: + uint64_t tenant_id_; + uint64_t data_table_id_; + uint64_t index_id_table_id_; + uint64_t session_id_; + ObSQLMode sql_mode_; + + common::ObArenaAllocator allocator_; + common::ObTimeZoneInfo tz_info_; + common::ObTimeZoneInfoWrap tz_info_wrap_; + common::ObString nls_formats_[common::ObNLSFormatEnum::NLS_MAX]; +}; + +struct ObVectorIndexRebuildRes final +{ + OB_UNIS_VERSION(1); +public: + ObVectorIndexRebuildRes() : task_id_(0), trace_id_() {} + ~ObVectorIndexRebuildRes() = default; + void reset() + { + task_id_ = 0; + trace_id_.reset(); + } + int assign(const ObVectorIndexRebuildRes &other) + { + if (this != &other) { + task_id_ = other.task_id_; + trace_id_ = other.trace_id_; + } + return OB_SUCCESS; + } + TO_STRING_KV(K_(task_id), K_(trace_id)); +public: + int64_t task_id_; + share::ObTaskId trace_id_; +}; + struct ObMViewCompleteRefreshArg final : public ObDDLArg { OB_UNIS_VERSION(1); @@ -2720,6 +2787,7 @@ public: DECLARE_VIRTUAL_TO_STRING; inline bool is_spatial_index() const { return ObSimpleTableSchemaV2::is_spatial_index(index_type_); } inline bool is_multivalue_index() const { return is_multivalue_index_aux(index_type_); } + inline bool is_vec_index() const { return ObSimpleTableSchemaV2::is_vec_index(index_type_); } //todo @qilu:only for each_cg now, when support customized cg ,refine this typedef common::ObSEArray ObCGColumnList; @@ -2780,22 +2848,22 @@ public: bool is_rebuild_index_; }; -struct ObGenerateAuxIndexSchemaArg : public ObDDLArg +struct ObCreateAuxIndexArg : public ObDDLArg { OB_UNIS_VERSION_V(1); public: - ObGenerateAuxIndexSchemaArg() + ObCreateAuxIndexArg() : tenant_id_(OB_INVALID_TENANT_ID), data_table_id_(OB_INVALID_ID) {} - ~ObGenerateAuxIndexSchemaArg() {} + ~ObCreateAuxIndexArg() {} bool is_valid() const { return tenant_id_ != OB_INVALID_TENANT_ID && - data_table_id_ != OB_INVALID_ID&& + data_table_id_ != OB_INVALID_ID && create_index_arg_.is_valid(); } - int assign(const ObGenerateAuxIndexSchemaArg &other); + int assign(const ObCreateAuxIndexArg &other); void reset() { tenant_id_ = OB_INVALID_TENANT_ID; @@ -2810,31 +2878,35 @@ public: ObCreateIndexArg create_index_arg_; }; -struct ObGenerateAuxIndexSchemaRes final +struct ObCreateAuxIndexRes final { OB_UNIS_VERSION_V(1); public: - ObGenerateAuxIndexSchemaRes() + ObCreateAuxIndexRes() : aux_table_id_(OB_INVALID_ID), + ddl_task_id_(OB_INVALID_ID), schema_generated_(false) {} - ~ObGenerateAuxIndexSchemaRes() {} - int assign(const ObGenerateAuxIndexSchemaRes &other) + ~ObCreateAuxIndexRes() {} + int assign(const ObCreateAuxIndexRes &other) { int ret = OB_SUCCESS; aux_table_id_ = other.aux_table_id_; + ddl_task_id_ = other.ddl_task_id_; schema_generated_ = other.schema_generated_; return ret; } void reset() { aux_table_id_ = OB_INVALID_ID; + ddl_task_id_ = OB_INVALID_ID; schema_generated_ = false; } - TO_STRING_KV(K(aux_table_id_), K(schema_generated_)); + TO_STRING_KV(K(aux_table_id_), K(ddl_task_id_), K(schema_generated_)); public: uint64_t aux_table_id_; + int64_t ddl_task_id_; bool schema_generated_; }; diff --git a/src/share/ob_srv_rpc_proxy.h b/src/share/ob_srv_rpc_proxy.h index 006a98f10d..d57ef955a5 100644 --- a/src/share/ob_srv_rpc_proxy.h +++ b/src/share/ob_srv_rpc_proxy.h @@ -149,6 +149,7 @@ public: RPC_S(PR5 calc_column_checksum_request, OB_CALC_COLUMN_CHECKSUM_REQUEST, (ObCalcColumnChecksumRequestArg), obrpc::ObCalcColumnChecksumRequestRes); RPC_AP(PR5 build_ddl_single_replica_request, OB_DDL_BUILD_SINGLE_REPLICA_REQUEST, (obrpc::ObDDLBuildSingleReplicaRequestArg), obrpc::ObDDLBuildSingleReplicaRequestResult); RPC_S(PR5 check_and_cancel_ddl_complement_dag, OB_CHECK_AND_CANCEL_DDL_COMPLEMENT_DAG, (ObDDLBuildSingleReplicaRequestArg), Bool); + RPC_S(PR5 check_and_cancel_delete_lob_meta_row_dag, OB_CHECK_AND_CANCEL_DELETE_LOB_META_ROW_DAG, (ObDDLBuildSingleReplicaRequestArg), Bool); RPC_S(PR5 fetch_tablet_autoinc_seq_cache, OB_FETCH_TABLET_AUTOINC_SEQ_CACHE, (obrpc::ObFetchTabletSeqArg), obrpc::ObFetchTabletSeqRes); RPC_AP(PR5 batch_get_tablet_autoinc_seq, OB_BATCH_GET_TABLET_AUTOINC_SEQ, (obrpc::ObBatchGetTabletAutoincSeqArg), obrpc::ObBatchGetTabletAutoincSeqRes); RPC_AP(PR5 batch_set_tablet_autoinc_seq, OB_BATCH_SET_TABLET_AUTOINC_SEQ, (obrpc::ObBatchSetTabletAutoincSeqArg), obrpc::ObBatchSetTabletAutoincSeqRes); diff --git a/src/share/ob_vec_index_builder_util.cpp b/src/share/ob_vec_index_builder_util.cpp new file mode 100644 index 0000000000..5f474b4b00 --- /dev/null +++ b/src/share/ob_vec_index_builder_util.cpp @@ -0,0 +1,2325 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX COMMON +#include +#include "ob_vec_index_builder_util.h" +#include "ob_index_builder_util.h" +#include "sql/resolver/ddl/ob_ddl_resolver.h" +#include "sql/engine/ob_exec_context.h" +#include "sql/resolver/expr/ob_raw_expr_util.h" + +namespace oceanbase +{ +using namespace common; +using namespace obrpc; +using namespace share::schema; + +namespace share +{ + +const char * ObVecIndexBuilderUtil::ROWKEY_VID_TABLE_NAME = "rowkey_vid_table"; +const char * ObVecIndexBuilderUtil::VID_ROWKEY_TABLE_NAME = "vid_rowkey_table"; +const char * ObVecIndexBuilderUtil::DELTA_BUFFER_TABLE_NAME_SUFFIX = ""; +const char * ObVecIndexBuilderUtil::INDEX_ID_TABLE_NAME_SUFFIX = "_index_id_table"; +const char * ObVecIndexBuilderUtil::SNAPSHOT_DATA_TABLE_NAME_SUFFIX = "_index_snapshot_data_table"; + +int ObVecIndexBuilderUtil::append_vec_rowkey_vid_arg( + const ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list) +{ + int ret = OB_SUCCESS; + ObCreateIndexArg vec_rowkey_vid_arg; + ObString empty_domain_index_name; + if (OB_ISNULL(allocator) || !(is_vec_index(index_arg.index_type_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret), K(index_arg.index_type_)); + } else if (OB_FAIL(vec_rowkey_vid_arg.assign(index_arg))) { + LOG_WARN("failed to assign to vec rowkey vid arg", K(ret)); + } else if (FALSE_IT(vec_rowkey_vid_arg.index_type_ = INDEX_TYPE_VEC_ROWKEY_VID_LOCAL)) { + } else if (OB_FAIL(generate_vec_index_name(allocator, + vec_rowkey_vid_arg.index_type_, + empty_domain_index_name, + vec_rowkey_vid_arg.index_name_))) { + LOG_WARN("failed to generate vec index name", K(ret)); + } else if (OB_FAIL(index_arg_list.push_back(vec_rowkey_vid_arg))) { + LOG_WARN("failed to push back vec rowkey vid arg", K(ret)); + } + return ret; +} + +int ObVecIndexBuilderUtil::append_vec_vid_rowkey_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list) +{ + int ret = OB_SUCCESS; + ObCreateIndexArg vec_vid_rowkey_arg; + ObString empty_domain_index_name; + if (OB_ISNULL(allocator) || !(is_vec_index(index_arg.index_type_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret), K(index_arg.index_type_)); + } else if (OB_FAIL(vec_vid_rowkey_arg.assign(index_arg))) { + LOG_WARN("failed to assign to vec vid rowkey arg", K(ret)); + } else if (FALSE_IT(vec_vid_rowkey_arg.index_type_ = INDEX_TYPE_VEC_VID_ROWKEY_LOCAL)) { + } else if (OB_FAIL(generate_vec_index_name(allocator, + vec_vid_rowkey_arg.index_type_, + empty_domain_index_name, + vec_vid_rowkey_arg.index_name_))) { + LOG_WARN("failed to generate vec index name", K(ret)); + } else if (OB_FAIL(index_arg_list.push_back(vec_vid_rowkey_arg))) { + LOG_WARN("failed to push back vec vid rowkey arg", K(ret)); + } + return ret; +} + +int ObVecIndexBuilderUtil::append_vec_delta_buffer_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + const sql::ObSQLSessionInfo *session_info, + ObIArray &index_arg_list) +{ + int ret = OB_SUCCESS; + ObCreateIndexArg vec_delta_buffer_arg; + char* buf = nullptr; + int64_t pos = 0; + ObString domain_index_name = index_arg.index_name_; + if (OB_ISNULL(allocator) || OB_ISNULL(session_info) || !(is_vec_index(index_arg.index_type_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret), K(index_arg.index_type_)); + } else if (OB_ISNULL(buf = reinterpret_cast(allocator->alloc(sizeof(char) * OB_MAX_PROC_ENV_LENGTH)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc buffer", KR(ret), K(OB_MAX_PROC_ENV_LENGTH)); + } else if (OB_FAIL(ObExecEnv::gen_exec_env(*session_info, buf, OB_MAX_PROC_ENV_LENGTH, pos))) { + LOG_WARN("fail to gen exec env", KR(ret)); + } else if (OB_FAIL(vec_delta_buffer_arg.assign(index_arg))) { + LOG_WARN("failed to assign to vec delta buffer arg", K(ret)); + } else if (FALSE_IT(vec_delta_buffer_arg.index_type_ = INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL)) { + } else if (OB_FAIL(generate_vec_index_name(allocator, + vec_delta_buffer_arg.index_type_, + domain_index_name, + vec_delta_buffer_arg.index_name_))) { + LOG_WARN("failed to generate vec index name", K(ret)); + } else if (FALSE_IT(vec_delta_buffer_arg.vidx_refresh_info_.exec_env_.assign_ptr(buf, pos))) { + } else if (OB_FAIL(index_arg_list.push_back(vec_delta_buffer_arg))) { + LOG_WARN("failed to push back vec delta buffer arg", K(ret)); + } + return ret; +} + + +int ObVecIndexBuilderUtil::append_vec_index_id_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list) +{ + int ret = OB_SUCCESS; + ObCreateIndexArg vec_index_id_arg; + ObString domain_index_name = index_arg.index_name_; + if (OB_ISNULL(allocator) || !(is_vec_index(index_arg.index_type_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret), K(index_arg.index_type_)); + } else if (OB_FAIL(vec_index_id_arg.assign(index_arg))) { + LOG_WARN("failed to assign to vec index id arg", K(ret)); + } else if (FALSE_IT(vec_index_id_arg.index_type_ = INDEX_TYPE_VEC_INDEX_ID_LOCAL)) { + } else if (OB_FAIL(generate_vec_index_name(allocator, + vec_index_id_arg.index_type_, + domain_index_name, + vec_index_id_arg.index_name_))) { + LOG_WARN("failed to generate vec index name", K(ret)); + } else if (OB_FAIL(index_arg_list.push_back(vec_index_id_arg))) { + LOG_WARN("failed to push back vec index id arg", K(ret)); + } + return ret; +} + + +int ObVecIndexBuilderUtil::append_vec_index_snapshot_data_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list) +{ + int ret = OB_SUCCESS; + ObCreateIndexArg vec_index_snapshot_data_arg; + ObString domain_index_name = index_arg.index_name_; + if (OB_ISNULL(allocator) || !(is_vec_index(index_arg.index_type_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret), K(index_arg.index_type_)); + } else if (OB_FAIL(vec_index_snapshot_data_arg.assign(index_arg))) { + LOG_WARN("failed to assign to snapshot data arg", K(ret)); + } else if (FALSE_IT(vec_index_snapshot_data_arg.index_type_ = INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL)) { + } else if (OB_FAIL(generate_vec_index_name(allocator, + vec_index_snapshot_data_arg.index_type_, + domain_index_name, + vec_index_snapshot_data_arg.index_name_))) { + LOG_WARN("failed to generate vec index name", K(ret)); + } else if (OB_FAIL(index_arg_list.push_back(vec_index_snapshot_data_arg))) { + LOG_WARN("failed to push back vec snapshot data arg", K(ret)); + } + return ret; +} + +int ObVecIndexBuilderUtil::check_vec_index_allowed( + ObTableSchema &data_schema) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!data_schema.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema)); + } else if (data_schema.is_partitioned_table() && data_schema.is_heap_table()) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create vector index on partition table without primary key"); + } + return ret; +} + +int ObVecIndexBuilderUtil::generate_vec_index_name( + ObIAllocator *allocator, + const share::schema::ObIndexType type, + const ObString &index_name, + ObString &new_index_name) +{ + int ret = OB_SUCCESS; + char *name_buf = nullptr; + if (OB_ISNULL(allocator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is nullptr", K(ret)); + } else if (!share::schema::is_vec_index(type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(type)); + } else if (OB_ISNULL(name_buf = static_cast(allocator->alloc(OB_MAX_TABLE_NAME_LENGTH)))) { + ret = common::OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc mem", K(ret)); + } else { + int64_t pos = 0; + if (share::schema::is_vec_rowkey_vid_type(type)) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(name_buf, + OB_MAX_TABLE_NAME_LENGTH, + pos, + "%s", + ROWKEY_VID_TABLE_NAME))) { + LOG_WARN("failed to print", K(ret)); + } + } else if (share::schema::is_vec_vid_rowkey_type(type)) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(name_buf, + OB_MAX_TABLE_NAME_LENGTH, + pos, + "%s", + VID_ROWKEY_TABLE_NAME))) { + LOG_WARN("failed to print", K(ret)); + } + } else if (share::schema::is_vec_delta_buffer_type(type)) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(name_buf, + OB_MAX_TABLE_NAME_LENGTH, + pos, + "%.*s%s", + index_name.length(), + index_name.ptr(), + DELTA_BUFFER_TABLE_NAME_SUFFIX))) { + LOG_WARN("failed to print", K(ret)); + } + } else if (share::schema::is_vec_index_id_type(type)) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(name_buf, + OB_MAX_TABLE_NAME_LENGTH, + pos, + "%.*s%s", + index_name.length(), + index_name.ptr(), + INDEX_ID_TABLE_NAME_SUFFIX))) { + LOG_WARN("failed to print", K(ret)); + } + } else if (share::schema::is_vec_index_snapshot_data_type(type)) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(name_buf, + OB_MAX_TABLE_NAME_LENGTH, + pos, + "%.*s%s", + index_name.length(), + index_name.ptr(), + SNAPSHOT_DATA_TABLE_NAME_SUFFIX))) { + LOG_WARN("failed to print", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, unknown vec index type", K(ret), K(type)); + } + if (OB_SUCC(ret)) { + new_index_name.assign_ptr(name_buf, static_cast(pos)); + } else { + LOG_WARN("failed to generate vec aux index name", K(ret)); + } + } + return ret; +} + +int ObVecIndexBuilderUtil::set_vec_rowkey_vid_table_columns( + const ObCreateIndexArg &arg, + const ObTableSchema &data_schema, + ObTableSchema &index_schema) +{ + int ret = OB_SUCCESS; + if (!data_schema.is_valid() || + arg.store_columns_.count() != 1 || /* vid column */ + !share::schema::is_vec_rowkey_vid_type(arg.index_type_)) { + // expect vid column in store columns + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), + K(data_schema), K(arg.store_columns_.count()), K(arg.index_type_)); + } + HEAP_VAR(ObRowDesc, row_desc) { + // 1. add rowkey_vid_table rowkey columns + for (int64_t i = 0; OB_SUCC(ret) && i < arg.index_columns_.count(); ++i) { + const ObColumnSchemaV2 *rowkey_column = nullptr; + const ObColumnSortItem &rowkey_col_item = arg.index_columns_.at(i); + const ObString &rowkey_col_name = rowkey_col_item.column_name_; + if (rowkey_col_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(rowkey_col_name)); + } else if (OB_ISNULL(rowkey_column = data_schema.get_column_schema(rowkey_col_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, + rowkey_col_name.length(), + rowkey_col_name.ptr()); + LOG_WARN("get_column_schema failed", + "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", rowkey_col_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(rowkey_column, + true/*is_index_column*/, + true/*is_rowkey*/, + arg.index_columns_.at(i).order_type_, + row_desc, + index_schema, + false/*is_hidden*/, + false/*is_specified_storing_col*/))) { + LOG_WARN("add column failed", "rowkey_column", *rowkey_column, + "rowkey_order_type", arg.index_columns_.at(i).order_type_, + K(row_desc), K(ret)); + } + } + if (OB_FAIL(ret)) { + } else { + index_schema.set_rowkey_column_num(row_desc.get_column_num()); + index_schema.set_index_column_num(row_desc.get_column_num()); + // 2. add rowkey_vid_table vid column + const ObColumnSchemaV2 *vid_column = nullptr; + const ObString &vid_col_name = arg.store_columns_.at(0); + // is_rowkey is false, order_in_rowkey will not be used + const ObOrderType order_in_rowkey = ObOrderType::DESC; + if (vid_col_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(vid_col_name)); + } else if (OB_ISNULL(vid_column = data_schema.get_column_schema(vid_col_name))) { + ret = OB_ERR_BAD_FIELD_ERROR; + LOG_WARN("get_column_schema failed", + "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", vid_col_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(vid_column, + false/*is_index_column*/, + false/*is_rowkey*/, + order_in_rowkey, + row_desc, + index_schema, + false/*is_hidden*/, + true/*is_specified_storing_col*/))) { + LOG_WARN("add_column failed", "vid_column", *vid_column, K(row_desc), K(ret)); + } else if (OB_FAIL(index_schema.sort_column_array_by_column_id())) { + LOG_WARN("failed to sort column", K(ret)); + } else { + LOG_INFO("succeed to set rowkey vid table columns", K(index_schema)); + } + } + } + LOG_DEBUG("finish set rowkey vid table column", K(ret), K(arg), K(index_schema), K(data_schema)); + return ret; +} + +int ObVecIndexBuilderUtil::set_vec_vid_rowkey_table_columns( + const ObCreateIndexArg &arg, + const ObTableSchema &data_schema, + ObTableSchema &index_schema) +{ + int ret = OB_SUCCESS; + if (!data_schema.is_valid() || + arg.index_columns_.count() != 1 || + !share::schema::is_vec_vid_rowkey_type(arg.index_type_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), + K(data_schema), K(arg.index_columns_.count()), K(arg.index_type_)); + } + HEAP_VAR(ObRowDesc, row_desc) { + // 1. add vid_rowkey_table vid id column + const ObColumnSchemaV2 *vid_column = nullptr; + const ObColumnSortItem &vid_col_item = arg.index_columns_.at(0); + const ObString &vid_col_name = vid_col_item.column_name_; + if (OB_FAIL(ret)) { + } else if (vid_col_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(vid_col_name)); + } else if (OB_ISNULL(vid_column = data_schema.get_column_schema(vid_col_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, + vid_col_name.length(), vid_col_name.ptr()); + LOG_WARN("get_column_schema failed", "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", vid_col_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(vid_column, + true/*is_index_column*/, + true/*is_rowkey*/, + vid_col_item.order_type_, + row_desc, + index_schema, + false/*is_hidden*/, + false/*is_specified_storing_col*/))) { + LOG_WARN("add column failed ", "vid_column", *vid_column, + "rowkey_order_type", vid_col_item.order_type_, K(row_desc), K(ret)); + } else { + index_schema.set_rowkey_column_num(row_desc.get_column_num()); + index_schema.set_index_column_num(row_desc.get_column_num()); + + // 2. add vid_rowkey_table rowkey column + const ObColumnSchemaV2 *rowkey_column = nullptr; + const ObRowkeyInfo &rowkey_info = data_schema.get_rowkey_info(); + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_info.get_size(); ++i) { + uint64_t column_id = OB_INVALID_ID; + if (OB_FAIL(rowkey_info.get_column_id(i, column_id))) { + LOG_WARN("get_column_id failed", "index", i, K(ret)); + } else if (OB_ISNULL(rowkey_column = data_schema.get_column_schema(column_id))) { + ret = OB_ERR_BAD_FIELD_ERROR; + LOG_WARN("get_column_schema failed", "table_id", data_schema.get_table_id(), + K(column_id), K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(rowkey_column, + false/*is_index_column*/, + false/*is_rowkey*/, + rowkey_column->get_order_in_rowkey(), + row_desc, + index_schema, + false/*is_hidden*/, + false/*is_specified_storing_col*/))) { + LOG_WARN("add column failed", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(index_schema.sort_column_array_by_column_id())) { + LOG_WARN("failed to sort column", K(ret)); + } else { + LOG_INFO("succeed to set vec vid rowkey table columns", K(index_schema)); + } + } + } + LOG_DEBUG("finish set vec vid rowkey table columns", K(ret), K(arg), K(index_schema), K(data_schema)); + return ret; +} + +/* + bigint char(1) vector_type + vid type vector +*/ +int ObVecIndexBuilderUtil::set_vec_delta_buffer_table_columns( + const ObCreateIndexArg &arg, + const ObTableSchema &data_schema, + ObTableSchema &index_schema) +{ + int ret = OB_SUCCESS; + if (!data_schema.is_valid() || + (!share::schema::is_vec_delta_buffer_type(arg.index_type_)) || + arg.index_columns_.count() != 2 || /*vid, type column */ + arg.store_columns_.count() != 1) { /* vector column */ /* 不算伪列 ora_rowscn */ + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), K(arg.index_type_), + K(arg.index_columns_.count()), K(arg.store_columns_.count()), + K(arg.index_columns_), K(arg.store_columns_)); + } + HEAP_VAR(ObRowDesc, row_desc) { + // 1. add delta_buffer_table vid, type column + for (int64_t i = 0; OB_SUCC(ret) && i < arg.index_columns_.count(); ++i) { + const ObColumnSchemaV2 *vec_column = nullptr; + const ObColumnSortItem &vec_col_item = arg.index_columns_.at(i); + const ObString &vec_col_name = vec_col_item.column_name_; + if (vec_col_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(vec_col_name)); + } else if (OB_ISNULL(vec_column = data_schema.get_column_schema(vec_col_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, + vec_col_name.length(), vec_col_name.ptr()); + LOG_WARN("get_column_schema failed", + "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", vec_col_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(vec_column, + true/*is_index_column*/, + true/*is_rowkey*/, + arg.index_columns_.at(i).order_type_, + row_desc, + index_schema, + false/*is_hidden*/, + false/*is_specified_storing_col*/))) { + LOG_WARN("add column failed", "vec_column", *vec_column, + "rowkey_order_type", arg.index_columns_.at(i).order_type_, + K(row_desc), K(ret)); + } + } + if (OB_SUCC(ret)) { + index_schema.set_rowkey_column_num(row_desc.get_column_num()); + index_schema.set_index_column_num(row_desc.get_column_num()); + } + // 2. add delta_buffer_table vector column + for (int64_t i = 0; OB_SUCC(ret) && i < arg.store_columns_.count(); ++i) { + const ObColumnSchemaV2 *store_column = nullptr; + const ObString &store_column_name = arg.store_columns_.at(i); + // is_rowkey is false, order_in_rowkey will not be used + const ObOrderType order_in_rowkey = ObOrderType::DESC; + if (OB_UNLIKELY(store_column_name.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(store_column_name)); + } else if (OB_ISNULL(store_column = data_schema.get_column_schema(store_column_name))) { + ret = OB_ERR_BAD_FIELD_ERROR; + LOG_WARN("get_column_schema failed", "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", store_column_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(store_column, + false/*is_index_column*/, + false/*is_rowkey*/, + order_in_rowkey, + row_desc, + index_schema, + false/*is_hidden*/, + true/*is_specified_storing_col*/))) { + LOG_WARN("add_column failed", K(store_column), K(row_desc), K(ret)); + } + } + // 3. add part key column + if (OB_FAIL(ret)) { + } else if (OB_FAIL(set_part_key_columns(data_schema, index_schema))) { + LOG_WARN("fail to generate part key columns", K(ret)); + } + // + if (FAILEDx(index_schema.sort_column_array_by_column_id())) { + LOG_WARN("failed to sort column", K(ret)); + } else { + LOG_INFO("succeed to set vec delta buffer table columns", K(index_schema)); + } + } + LOG_DEBUG("finish set vec delta buffer table column", K(ret), K(arg), K(index_schema), K(data_schema)); + return ret; +} + + +/* + bigint bigint char(1) vector_type + scn vid type vector +*/ +int ObVecIndexBuilderUtil::set_vec_index_id_table_columns( + const ObCreateIndexArg &arg, + const ObTableSchema &data_schema, + ObTableSchema &index_schema) +{ + int ret = OB_SUCCESS; + if (!data_schema.is_valid() || + (!share::schema::is_vec_index_id_type(arg.index_type_)) || + arg.index_columns_.count() != 3 || /* scn, vid, type column */ + arg.store_columns_.count() != 1) { /* vector column */ + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), K(arg.index_type_), + K(arg.index_columns_.count()), K(arg.store_columns_.count()), + K(arg.index_columns_), K(arg.store_columns_)); + } + HEAP_VAR(ObRowDesc, row_desc) { + // 1. add index_id_table scn, vid column + for (int64_t i = 0; OB_SUCC(ret) && i < arg.index_columns_.count(); ++i) { + const ObColumnSchemaV2 *vec_column = nullptr; + const ObColumnSortItem &vec_col_item = arg.index_columns_.at(i); + const ObString &vec_col_name = vec_col_item.column_name_; + if (vec_col_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(vec_col_name)); + } else if (OB_ISNULL(vec_column = data_schema.get_column_schema(vec_col_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, + vec_col_name.length(), vec_col_name.ptr()); + LOG_WARN("get_column_schema failed", + "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", vec_col_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(vec_column, + true/*is_index_column*/, + true/*is_rowkey*/, + arg.index_columns_.at(i).order_type_, + row_desc, + index_schema, + false/*is_hidden*/, + false/*is_specified_storing_col*/))) { + LOG_WARN("add column failed", "vec_column", *vec_column, + "rowkey_order_type", arg.index_columns_.at(i).order_type_, + K(row_desc), K(ret)); + } + } + if (OB_SUCC(ret)) { + index_schema.set_rowkey_column_num(row_desc.get_column_num()); + index_schema.set_index_column_num(row_desc.get_column_num()); + } + // 2. add index_id_table vector column + for (int64_t i = 0; OB_SUCC(ret) && i < arg.store_columns_.count(); ++i) { + const ObColumnSchemaV2 *store_column = nullptr; + const ObString &store_column_name = arg.store_columns_.at(i); + // is_rowkey is false, order_in_rowkey will not be used + const ObOrderType order_in_rowkey = ObOrderType::DESC; + if (OB_UNLIKELY(store_column_name.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(store_column_name)); + } else if (OB_ISNULL(store_column = data_schema.get_column_schema(store_column_name))) { + ret = OB_ERR_BAD_FIELD_ERROR; + LOG_WARN("get_column_schema failed", "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", store_column_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(store_column, + false/*is_index_column*/, + false/*is_rowkey*/, + order_in_rowkey, + row_desc, + index_schema, + false/*is_hidden*/, + true/*is_specified_storing_col*/))) { + LOG_WARN("add_column failed", K(store_column), K(row_desc), K(ret)); + } + } + // 3. add part key column + if (OB_FAIL(ret)) { + } else if (OB_FAIL(set_part_key_columns(data_schema, index_schema))) { + LOG_WARN("fail to generate part key columns", K(ret)); + } + // + if (FAILEDx(index_schema.sort_column_array_by_column_id())) { + LOG_WARN("failed to sort column", K(ret)); + } else { + LOG_INFO("succeed to set vec index id table columns", K(index_schema)); + } + } + LOG_DEBUG("finish set vec index id table column", K(ret), K(arg), K(index_schema), K(data_schema)); + return ret; +} + + +/* + varchar blob + key data +*/ +int ObVecIndexBuilderUtil::set_vec_index_snapshot_data_table_columns( + const ObCreateIndexArg &arg, + const ObTableSchema &data_schema, + ObTableSchema &index_schema) +{ + int ret = OB_SUCCESS; + if (!data_schema.is_valid() || + (!share::schema::is_vec_index_snapshot_data_type(arg.index_type_)) || + arg.index_columns_.count() != 1 || /* key column */ + arg.store_columns_.count() != 3) { /* data , vid, vector column */ + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), K(arg.index_type_), + K(arg.index_columns_.count()), K(arg.store_columns_.count()), + K(arg.index_columns_), K(arg.store_columns_)); + } + HEAP_VAR(ObRowDesc, row_desc) { + // 1. add index_snapshot_data_table key column + for (int64_t i = 0; OB_SUCC(ret) && i < arg.index_columns_.count(); ++i) { + const ObColumnSchemaV2 *vec_column = nullptr; + const ObColumnSortItem &vec_col_item = arg.index_columns_.at(i); + const ObString &vec_col_name = vec_col_item.column_name_; + if (vec_col_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(vec_col_name)); + } else if (OB_ISNULL(vec_column = data_schema.get_column_schema(vec_col_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, + vec_col_name.length(), vec_col_name.ptr()); + LOG_WARN("get_column_schema failed", + "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", vec_col_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(vec_column, + true/*is_index_column*/, + true/*is_rowkey*/, + arg.index_columns_.at(i).order_type_, + row_desc, + index_schema, + false/*is_hidden*/, + false/*is_specified_storing_col*/))) { + LOG_WARN("add column failed", "vec_column", *vec_column, + "rowkey_order_type", arg.index_columns_.at(i).order_type_, + K(row_desc), K(ret)); + } + } + if (OB_SUCC(ret)) { + index_schema.set_rowkey_column_num(row_desc.get_column_num()); + index_schema.set_index_column_num(row_desc.get_column_num()); + } + // 2. add index_snapshot_data_table data column + for (int64_t i = 0; OB_SUCC(ret) && i < arg.store_columns_.count(); ++i) { + const ObColumnSchemaV2 *store_column = nullptr; + const ObString &store_column_name = arg.store_columns_.at(i); + // is_rowkey is false, order_in_rowkey will not be used + const ObOrderType order_in_rowkey = ObOrderType::DESC; + if (OB_UNLIKELY(store_column_name.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(store_column_name)); + } else if (OB_ISNULL(store_column = data_schema.get_column_schema(store_column_name))) { + ret = OB_ERR_BAD_FIELD_ERROR; + LOG_WARN("get_column_schema failed", "tenant_id", data_schema.get_tenant_id(), + "database_id", data_schema.get_database_id(), + "table_name", data_schema.get_table_name(), + "column name", store_column_name, K(ret)); + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(store_column, + false/*is_index_column*/, + false/*is_rowkey*/, + order_in_rowkey, + row_desc, + index_schema, + false/*is_hidden*/, + true/*is_specified_storing_col*/))) { + LOG_WARN("add_column failed", K(store_column), K(row_desc), K(ret)); + } + } + if (FAILEDx(index_schema.sort_column_array_by_column_id())) { + LOG_WARN("failed to sort column", K(ret)); + } else { + LOG_INFO("succeed to set vec index table columns", K(index_schema)); + } + } + LOG_DEBUG("finish set vec index snapshot data table column", K(ret), K(arg), K(index_schema), K(data_schema)); + return ret; +} + +/* + * 1. 生成辅助表的列 + 2. 把辅助表的对应的列放入index_arg (主键放入index_column,非主键放入store_column) +*/ +int ObVecIndexBuilderUtil::adjust_vec_args( + obrpc::ObCreateIndexArg &index_arg, + ObTableSchema &data_schema, // not const since will add column to data schema + ObIAllocator &allocator, + ObIArray &gen_columns) +{ + int ret = OB_SUCCESS; + const ObIndexType &index_type = index_arg.index_type_; + + uint64_t vid_col_id = OB_INVALID_ID; + uint64_t type_col_id = OB_INVALID_ID; + uint64_t vector_col_id = OB_INVALID_ID; + uint64_t scn_col_id = OB_INVALID_ID; + uint64_t key_col_id = OB_INVALID_ID; + uint64_t data_col_id = OB_INVALID_ID; + + const ObColumnSchemaV2 *existing_vid_col = nullptr; + const ObColumnSchemaV2 *existing_type_col = nullptr; + const ObColumnSchemaV2 *existing_vector_col = nullptr; + const ObColumnSchemaV2 *existing_scn_col = nullptr; + const ObColumnSchemaV2 *existing_key_col = nullptr; + const ObColumnSchemaV2 *existing_data_col = nullptr; + + ObArray tmp_cols; + uint64_t available_col_id = 0; + bool is_rowkey_vid = false; + bool is_vid_rowkey = false; + bool is_delta_buffer = false; + bool is_index_id = false; + bool is_index_snapshot_data = false; + + if (!data_schema.is_valid() || !share::schema::is_vec_index(index_type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), K(index_type)); + } else if (FALSE_IT(available_col_id = data_schema.get_max_used_column_id() + 1)) { + } else if (FALSE_IT(is_rowkey_vid = share::schema::is_vec_rowkey_vid_type(index_type))) { + } else if (FALSE_IT(is_vid_rowkey = share::schema::is_vec_vid_rowkey_type(index_type))) { + } else if (FALSE_IT(is_delta_buffer = share::schema::is_vec_delta_buffer_type(index_type))) { + } else if (FALSE_IT(is_index_id = share::schema::is_vec_index_id_type(index_type))) { + } else if (FALSE_IT(is_index_snapshot_data = share::schema::is_vec_index_snapshot_data_type(index_type))) { + } else if (OB_FAIL(check_vec_cols(&index_arg, data_schema))) { + LOG_WARN("check cols check failed", K(ret)); + } else if (OB_FAIL(get_vec_vid_col(data_schema, existing_vid_col))) { + LOG_WARN("failed to get vid id col", K(ret)); + } else if (OB_FAIL(get_vec_type_col(data_schema, &index_arg, existing_type_col))) { + LOG_WARN("failed to get vec type col", K(ret)); + } else if (OB_FAIL(get_vec_vector_col(data_schema, &index_arg, existing_vector_col))) { + LOG_WARN("fail to get vec vector column", K(ret)); + } else if (OB_FAIL(get_vec_scn_col(data_schema, &index_arg, existing_scn_col))) { + LOG_WARN("failed to get vec scn col", K(ret)); + } else if (OB_FAIL(get_vec_key_col(data_schema, &index_arg, existing_key_col))) { + LOG_WARN("failed to get vec key col", K(ret)); + } else if (OB_FAIL(get_vec_data_col(data_schema, &index_arg, existing_data_col))) { + LOG_WARN("failed to get vec data col", K(ret)); + } else { + ObColumnSchemaV2 *generated_vid_col = nullptr; + ObColumnSchemaV2 *generated_type_col = nullptr; + ObColumnSchemaV2 *generated_vector_col = nullptr; + ObColumnSchemaV2 *generated_scn_col = nullptr; + ObColumnSchemaV2 *generated_key_col = nullptr; + ObColumnSchemaV2 *generated_data_col = nullptr; + if (OB_ISNULL(existing_vid_col)) { // need to generate vid column + vid_col_id = available_col_id++; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(generate_vid_column(&index_arg, vid_col_id, data_schema, generated_vid_col))) { + LOG_WARN("failed to generate vid column", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_vid_col))) { + LOG_WARN("failed to push back vid column", K(ret)); + } + } + if (is_rowkey_vid || is_vid_rowkey) { + } else if (is_delta_buffer || is_index_id || is_index_snapshot_data) { + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(existing_type_col)) { + type_col_id = available_col_id++; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(generate_type_column(&index_arg, type_col_id, data_schema, generated_type_col))) { + LOG_WARN("failed to generate type column", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_type_col))) { + LOG_WARN("failed to push type column", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(existing_vector_col)) { + vector_col_id = available_col_id++; + if (OB_FAIL(generate_vector_column(&index_arg, vector_col_id, data_schema, generated_vector_col))) { + LOG_WARN("failed to generate vector column", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_vector_col))) { + LOG_WARN("failed to push back vector column", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(existing_scn_col)) { + scn_col_id = available_col_id++; + if (OB_FAIL(generate_scn_column(&index_arg, scn_col_id, data_schema, generated_scn_col))) { + LOG_WARN("fail to generate scn column", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_scn_col))) { + LOG_WARN("fail to push back generated scn column", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(existing_key_col)) { + key_col_id = available_col_id++; + if (OB_FAIL(generate_key_column(&index_arg, key_col_id, data_schema, generated_key_col))) { + LOG_WARN("fail to generate key column", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_key_col))) { + LOG_WARN("fail to push back generated key column", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(existing_data_col)) { + data_col_id = available_col_id++; + if (OB_FAIL(generate_data_column(&index_arg, data_col_id, data_schema, generated_data_col))) { + LOG_WARN("fail to generate data column", K(ret)); + } else if (OB_FAIL(gen_columns.push_back(generated_data_col))) { + LOG_WARN("fail to push back generated data column", K(ret)); + } + } + } + if (OB_FAIL(ret)) { + } else if (is_rowkey_vid || is_vid_rowkey) { + if (OB_FAIL(push_back_gen_col(tmp_cols, existing_vid_col, generated_vid_col))) { + LOG_WARN("failed to push back vid column", K(ret)); + } else if (OB_FAIL(adjust_vec_arg(&index_arg, data_schema, allocator, tmp_cols))) { + LOG_WARN("failed to append vec index arg", K(ret)); + } + } else if (is_delta_buffer) { + if (OB_FAIL(push_back_gen_col(tmp_cols, existing_vid_col, generated_vid_col))) { + LOG_WARN("failed to push back vid col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, existing_type_col, generated_type_col))) { + LOG_WARN("failed to push back type col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, existing_vector_col, generated_vector_col))) { + LOG_WARN("failed to push back vector col", K(ret)); + } else if (OB_FAIL(adjust_vec_arg(&index_arg, data_schema, allocator, tmp_cols))) { + LOG_WARN("failed to append vec index arg", K(ret)); + } + } else if (is_index_id) { + if (OB_FAIL(push_back_gen_col(tmp_cols, existing_scn_col, generated_scn_col))) { + LOG_WARN("failed to push back scn col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, existing_vid_col, generated_vid_col))) { + LOG_WARN("failed to push back vid col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, existing_type_col, generated_type_col))) { + LOG_WARN("failed to push back type col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, existing_vector_col, generated_vector_col))) { + LOG_WARN("fail to push back vector col", K(ret)); + } else if (OB_FAIL(adjust_vec_arg(&index_arg, data_schema, allocator, tmp_cols))) { + LOG_WARN("failed to append vec index arg", K(ret)); + } + } else if (is_index_snapshot_data) { + if (OB_FAIL(push_back_gen_col(tmp_cols, existing_key_col, generated_key_col))) { + LOG_WARN("failed to push back key col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, existing_data_col, generated_data_col))) { + LOG_WARN("failed to push back data col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, existing_vid_col, generated_vid_col))) { + LOG_WARN("failed to push back vid col", K(ret)); + } else if (OB_FAIL(push_back_gen_col(tmp_cols, existing_vector_col, generated_vector_col))) { + LOG_WARN("failed to push back vector col", K(ret)); + } else if (OB_FAIL(adjust_vec_arg(&index_arg, data_schema, allocator, tmp_cols))) { + LOG_WARN("failed to append vec index arg", K(ret)); + } + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::adjust_vec_arg( + ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + ObIAllocator &allocator, + const ObIArray &vec_cols) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema)); + } else { + const ObIndexType &index_type = index_arg->index_type_; + const bool is_vec_rowkey_vid = share::schema::is_vec_rowkey_vid_type(index_arg->index_type_); + const bool is_vec_vid_rowkey = share::schema::is_vec_vid_rowkey_type(index_arg->index_type_); + const bool is_vec_delta_buffer = share::schema::is_vec_delta_buffer_type(index_arg->index_type_); + const bool is_vec_index_id = share::schema::is_vec_index_id_type(index_arg->index_type_); + const bool is_vec_index_snapshot_data = share::schema::is_vec_index_snapshot_data_type(index_arg->index_type_); + + if ((is_vec_rowkey_vid && vec_cols.count() != 1) || /* rowkey_vid_table 的生成列数,由于不需要生成主表主键列,因此只有1列 */ + (is_vec_vid_rowkey && vec_cols.count() != 1) || /* vid_rowkey_table 的生成列数,由于不需要生成主表主键列,因此只有1列*/ + (is_vec_delta_buffer && vec_cols.count() != 3) || /* delta_buffer_table 的生成列数,不算伪列,共3列 */ + (is_vec_index_id && vec_cols.count() != 4) || /* index_table_id 的生成列数,共4列 */ + (is_vec_index_snapshot_data && vec_cols.count() != 4) ) { /* index_snapshot_data_table 的生成列数,共2列*/ + ret = OB_ERR_UNEXPECTED; + LOG_WARN("vec cols count not expected", K(ret), K(index_type), K(vec_cols)); + } else { + index_arg->index_columns_.reuse(); + index_arg->store_columns_.reuse(); + if (is_vec_rowkey_vid) { + // 1. add rowkey column to arg->index_columns + const ObRowkeyInfo &rowkey_info = data_schema.get_rowkey_info(); + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_info.get_size(); ++i) { + ObColumnSortItem rowkey_column; + const ObColumnSchemaV2 *rowkey_col = NULL; + uint64_t column_id = OB_INVALID_ID; + if (OB_FAIL(rowkey_info.get_column_id(i, column_id))) { + LOG_WARN("get_column_id failed", "index", i, K(ret)); + } else if (NULL == (rowkey_col = data_schema.get_column_schema(column_id))) { + ret = OB_ERR_BAD_FIELD_ERROR; + LOG_WARN("get_column_schema failed", "table_id", + data_schema.get_table_id(), K(column_id), K(ret)); + } else if (OB_FAIL(ob_write_string(allocator, + rowkey_col->get_column_name_str(), + rowkey_column.column_name_))) { + //to keep the memory lifetime of column_name consistent with index_arg + LOG_WARN("deep copy column name failed", K(ret)); + } else if (OB_FAIL(index_arg->index_columns_.push_back(rowkey_column))) { + LOG_WARN("failed to push back rowkey column", K(ret)); + } + } + // 2. add vid column to arg->store_columns + const ObColumnSchemaV2 *vid_col = vec_cols.at(0); + ObString vid_col_name; + if (FAILEDx(ob_write_string(allocator, vid_col->get_column_name_str(), vid_col_name))) { + LOG_WARN("fail to deep copy vid id column name", K(ret)); + } else if (OB_FAIL(index_arg->store_columns_.push_back(vid_col_name))) { + LOG_WARN("failed to push back vid id column", K(ret)); + } + } else if (is_vec_vid_rowkey) { + // add vid column to index_columns + ObColumnSortItem vid_column; + const ObColumnSchemaV2 *vid_col = vec_cols.at(0); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(vid_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("vec col is null", K(ret)); + } else if (OB_FAIL(ob_write_string(allocator, + vid_col->get_column_name_str(), + vid_column.column_name_))) { + //to keep the memory lifetime of column_name consistent with index_arg + LOG_WARN("deep copy column name failed", K(ret)); + } else if (OB_FAIL(index_arg->index_columns_.push_back(vid_column))) { + LOG_WARN("failed to push back vid id column", K(ret)); + } + + } else if (is_vec_delta_buffer) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(inner_adjust_vec_arg(index_arg, + vec_cols, + OB_VEC_DELTA_BUFFER_TABLE_INDEX_COL_CNT, + &allocator))) { + LOG_WARN("failed to inner_adjust_vec_arg", K(ret)); + } + } else if (is_vec_index_id) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(inner_adjust_vec_arg(index_arg, + vec_cols, + OB_VEC_INDEX_ID_TABLE_INDEX_COL_CNT, + &allocator))) { + LOG_WARN("failed to inner_adjust_vec_arg", K(ret)); + } + } else if (is_vec_index_snapshot_data) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(inner_adjust_vec_arg(index_arg, + vec_cols, + OB_VEC_INDEX_SNAPSHOT_DATA_TABLE_INDEX_COL_CNT, + &allocator))) { + LOG_WARN("failed to inner_adjust_vec_arg", K(ret)); + } + } + } + } + return ret; +} + +int ObVecIndexBuilderUtil::inner_adjust_vec_arg( + obrpc::ObCreateIndexArg *vec_arg, + const ObIArray &vec_cols, + const int index_column_cnt, // 辅助表的主键列数 + ObIAllocator *allocator) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(vec_arg) || OB_ISNULL(allocator) || + (!share::schema::is_vec_delta_buffer_type(vec_arg->index_type_) && + !share::schema::is_vec_index_id_type(vec_arg->index_type_) && + !share::schema::is_vec_index_snapshot_data_type(vec_arg->index_type_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid argument", K(ret), KPC(vec_arg), KP(allocator)); + } else if ((share::schema::is_vec_delta_buffer_type(vec_arg->index_type_) || + share::schema::is_vec_index_id_type(vec_arg->index_type_)) && + vec_cols.count() != index_column_cnt + 1) { // index_rowkey_column_cnt + common_col_cnt。 delta_buffer_table 和 index_id_table 的非主键列为1 + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid argument", K(ret), K(vec_cols.count()), K(index_column_cnt)); + } else if (share::schema::is_vec_index_snapshot_data_type(vec_arg->index_type_) && + vec_cols.count() != index_column_cnt + 3) { // index_rowkey_column_cnt + common_col_cnt , snapshot_data 的非主键列为3 + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid argument", K(ret), K(vec_cols.count()), K(index_column_cnt)); + } else { + // 1. add assistant table rowkey column to arg->index_columns + for (int64_t i = 0; OB_SUCC(ret) && i < index_column_cnt; ++i) { + ObColumnSortItem vec_column; + const ObColumnSchemaV2 *vec_col = vec_cols.at(i); + if (OB_ISNULL(vec_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("vec_col is null", K(ret), K(i)); + } else if (OB_FAIL(ob_write_string(*allocator, + vec_col->get_column_name_str(), + vec_column.column_name_))) { + //to keep the memory lifetime of column_name consistent with index_arg + LOG_WARN("deep copy column name failed", K(ret)); + } else if (OB_FAIL(vec_arg->index_columns_.push_back(vec_column))) { + LOG_WARN("failed to push back index column", K(ret)); + } + } + // 2. add none assistant table none rowkey column to arg->store_columns + for (int64_t i = index_column_cnt; i < vec_cols.count(); ++i) { + const ObColumnSchemaV2 *other_col = vec_cols.at(i); + ObString other_col_name; + if (FAILEDx(ob_write_string(*allocator, other_col->get_column_name_str(), other_col_name))) { + LOG_WARN("fail to deep copy other column name", K(ret)); + } else if (OB_FAIL(vec_arg->store_columns_.push_back(other_col_name))) { + LOG_WARN("failed to push back other column", K(ret)); + } + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::push_back_gen_col( + ObIArray &cols, + const ObColumnSchemaV2 *existing_col, + ObColumnSchemaV2 *generated_col) +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(existing_col)) { + if (OB_FAIL(cols.push_back(existing_col))) { + LOG_WARN("failed to push back existing col", K(ret)); + } + } else { + if (OB_ISNULL(generated_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("generated col is nullptr", K(ret)); + } else if (OB_FAIL(cols.push_back(generated_col))) { + LOG_WARN("failed to push back generated col", K(ret)); + } + } + return ret; +} + +int ObVecIndexBuilderUtil::generate_vid_column( + const ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&vid_col) +{ + int ret = OB_SUCCESS; + vid_col = nullptr; + char col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + int64_t name_pos = 0; + bool col_exists = false; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid() || + col_id == OB_INVALID_ID) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), K(col_id)); + } else if (OB_FAIL(construct_vid_col_name(col_name_buf, OB_MAX_COLUMN_NAME_LENGTH, name_pos))) { + LOG_WARN("failed to construct vid column name", K(ret)); + } else if (OB_FAIL(check_vec_gen_col(data_schema, col_id, col_name_buf, name_pos, col_exists))) { + LOG_WARN("check vid column failed", K(ret)); + } else if (!col_exists) { + const ObRowkeyInfo &rowkey_info = data_schema.get_rowkey_info(); + const ObColumnSchemaV2 *col_schema = nullptr; + SMART_VAR(char[OB_MAX_DEFAULT_VALUE_LENGTH], vec_expr_def) { + MEMSET(vec_expr_def, 0, sizeof(vec_expr_def)); + int64_t def_pos = 0; + if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, "VEC_VID()"))) { + LOG_WARN("print generate expr definition prefix failed", K(ret)); + } else { + ObColumnSchemaV2 column_schema; + ObObj default_value; + default_value.set_varchar(vec_expr_def, static_cast(def_pos)); + column_schema.set_rowkey_position(0); //非主键列 + column_schema.set_index_position(0); //非索引列 + column_schema.set_tbl_part_key_pos(0); //非partition key + column_schema.set_tenant_id(data_schema.get_tenant_id()); + column_schema.set_table_id(data_schema.get_table_id()); + column_schema.set_column_id(col_id); + column_schema.add_column_flag(GENERATED_VEC_VID_COLUMN_FLAG); + column_schema.add_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + column_schema.set_is_hidden(true); + column_schema.set_nullable(false); + column_schema.set_data_type(ObIntType); + column_schema.set_data_length(0); + column_schema.set_collation_type(CS_TYPE_BINARY); + column_schema.set_prev_column_id(UINT64_MAX); + column_schema.set_next_column_id(UINT64_MAX); + if (OB_FAIL(column_schema.set_column_name(col_name_buf))) { + LOG_WARN("set column name failed", K(ret)); + } else if (OB_FAIL(column_schema.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(column_schema.set_cur_default_value(default_value, column_schema.is_default_expr_v2_column()))) { + LOG_WARN("set current default value failed", K(ret)); + } else if (OB_FAIL(data_schema.add_column(column_schema))) { + LOG_WARN("add column schema to data table failed", K(ret)); + } else { + vid_col = data_schema.get_column_schema(column_schema.get_column_id()); + if (OB_ISNULL(vid_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("generate vid column schema failed", K(ret), KP(vid_col)); + } else { + LOG_INFO("succeed to generate vid column schema", KCSTRING(col_name_buf), K(col_id), K(data_schema)); + } + } + } + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::generate_type_column( + const ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&type_col) +{ + int ret = OB_SUCCESS; + type_col = nullptr; + char col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + int64_t name_pos = 0; + bool col_exists = false; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid() || + col_id == OB_INVALID_ID ) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), K(col_id)); + } else if (OB_FAIL(construct_type_col_name(index_arg, data_schema, col_name_buf, + OB_MAX_COLUMN_NAME_LENGTH, name_pos))) { + LOG_WARN("failed to construct type col name", K(ret)); + } else if (OB_FAIL(check_vec_gen_col(data_schema, col_id, col_name_buf, name_pos, col_exists))) { + LOG_WARN("check vec gen col failed", K(ret)); + } else if (!col_exists) { + ObColumnSchemaV2 column_schema; + SMART_VAR(char[OB_MAX_DEFAULT_VALUE_LENGTH], vec_expr_def) { + ObArray extend_type_info; + MEMSET(vec_expr_def, 0, sizeof(vec_expr_def)); + int64_t def_pos = 0; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, + "VEC_TYPE("))) { + LOG_WARN("print generate expr definition prefix failed", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + ObColumnSchemaV2 *col_schema = nullptr; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(column_schema.add_cascaded_column_id(col_schema->get_column_id()))) { + LOG_WARN("add cascaded column to generated column failed", K(ret)); + } else if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, + "`%s`, ", + col_schema->get_column_name()))) { + LOG_WARN("print column name to buffer failed", K(ret)); + } else if (OB_FAIL(extend_type_info.assign(col_schema->get_extended_type_info()))) { + LOG_WARN("fail to assign extend type info"); + } + } + if (OB_FAIL(ret)) { + } else { + def_pos -= 2; // remove last ", " + if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, ")"))) { + LOG_WARN("print generate expr definition suffix failed", K(ret)); + } else { + ObObj default_value; + default_value.set_varchar(vec_expr_def, static_cast(def_pos)); + column_schema.set_rowkey_position(0); //非主键列 + column_schema.set_index_position(0); //非索引列 + column_schema.set_tbl_part_key_pos(0); //非partition key + column_schema.set_tenant_id(data_schema.get_tenant_id()); + column_schema.set_table_id(data_schema.get_table_id()); + column_schema.set_column_id(col_id); + column_schema.add_column_flag(GENERATED_VEC_TYPE_COLUMN_FLAG); + column_schema.add_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + column_schema.set_is_hidden(true); + column_schema.set_data_type(ObCharType); // char(1) + column_schema.set_data_length(1); + column_schema.set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI); + column_schema.set_prev_column_id(UINT64_MAX); + column_schema.set_next_column_id(UINT64_MAX); + if (OB_FAIL(column_schema.set_column_name(col_name_buf))) { + LOG_WARN("set column name failed", K(ret)); + } else if (OB_FAIL(column_schema.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(column_schema.set_cur_default_value(default_value, column_schema.is_default_expr_v2_column()))) { + LOG_WARN("set current default value failed", K(ret)); + } else if (OB_FAIL(data_schema.add_column(column_schema))) { + LOG_WARN("add column schema to data table failed", K(ret)); + } else { + type_col = data_schema.get_column_schema(column_schema.get_column_id()); + if (OB_ISNULL(type_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("generate type column failed", K(ret), KP(type_col)); + } else { + LOG_INFO("succeed to generate type column", KCSTRING(col_name_buf), K(col_id), K(data_schema)); + } + } + } + } + } + } + return ret; +} + +int ObVecIndexBuilderUtil::generate_vector_column( + const ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&vector_col) +{ + int ret = OB_SUCCESS; + vector_col = nullptr; + char col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + int64_t name_pos = 0; + bool col_exists = false; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid() || + col_id == OB_INVALID_ID ) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), K(col_id)); + } else if (OB_FAIL(construct_vector_col_name(index_arg, data_schema, col_name_buf, + OB_MAX_COLUMN_NAME_LENGTH, name_pos))) { + LOG_WARN("failed to construct vector column name", K(ret)); + } else if (OB_FAIL(check_vec_gen_col(data_schema, col_id, col_name_buf, name_pos, col_exists))) { + LOG_WARN("check vec gen column failed", K(ret)); + } else if (!col_exists) { + ObColumnSchemaV2 column_schema; + ObArray extend_type_info; + SMART_VAR(char[OB_MAX_DEFAULT_VALUE_LENGTH], vec_expr_def) { + MEMSET(vec_expr_def, 0, sizeof(vec_expr_def)); + int64_t def_pos = 0; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, + "VEC_VECTOR("))) { + LOG_WARN("print generate expr definition prefix failed", K(ret)); + } + // 这里的 index_arg->index_columns_ 包含了向量索引列,目前仅支持单列 + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + ObColumnSchemaV2 *col_schema = nullptr; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(column_schema.add_cascaded_column_id(col_schema->get_column_id()))) { + LOG_WARN("add cascaded column to generated column failed", K(ret)); + } else if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, + "`%s`, ", + col_schema->get_column_name()))) { + LOG_WARN("print column name to buffer failed", K(ret)); + } else if (OB_FAIL(extend_type_info.assign(col_schema->get_extended_type_info()))) { + LOG_WARN("fail to assign extend type info", K(ret), KPC(col_schema)); + } + } + if (OB_FAIL(ret)) { + } else { + def_pos -= 2; // remove last ", " + if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, ")"))) { + LOG_WARN("print generate expr definition suffix failed", K(ret)); + } else { + ObObj default_value; + default_value.set_varchar(vec_expr_def, static_cast(def_pos)); + column_schema.set_rowkey_position(0); //非主键列 + column_schema.set_index_position(0); //非索引列 + column_schema.set_tbl_part_key_pos(0); //非partition key + column_schema.set_tenant_id(data_schema.get_tenant_id()); + column_schema.set_table_id(data_schema.get_table_id()); + column_schema.set_column_id(col_id); + column_schema.add_column_flag(GENERATED_VEC_VECTOR_COLUMN_FLAG); + column_schema.add_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + column_schema.set_is_hidden(true); + column_schema.set_data_type(ObCollectionSQLType); // vector type + column_schema.set_data_length(0); + column_schema.set_collation_type(CS_TYPE_BINARY); + column_schema.set_prev_column_id(UINT64_MAX); + column_schema.set_next_column_id(UINT64_MAX); + column_schema.set_nullable(true); + if (OB_FAIL(column_schema.set_extended_type_info(extend_type_info))) { + LOG_WARN("fail to set extend type info", K(ret), K(extend_type_info)); + } else if (OB_FAIL(column_schema.set_column_name(col_name_buf))) { + LOG_WARN("set column name failed", K(ret)); + } else if (OB_FAIL(column_schema.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(column_schema.set_cur_default_value(default_value, column_schema.is_default_expr_v2_column()))) { + LOG_WARN("set current default value failed", K(ret)); + } else if (OB_FAIL(data_schema.add_column(column_schema))) { + LOG_WARN("add column schema to data table failed", K(ret)); + } else { + vector_col = data_schema.get_column_schema(column_schema.get_column_id()); + if (OB_ISNULL(vector_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("generate vector column failed", K(ret), KP(vector_col)); + } else { + LOG_INFO("succeed to generate vector column", KCSTRING(col_name_buf), K(col_id), K(data_schema)); + } + } + } + } + } + } + return ret; +} + +int ObVecIndexBuilderUtil::generate_scn_column( + const ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, // not const since will add column to data schema + ObColumnSchemaV2 *&scn_col) +{ + int ret = OB_SUCCESS; + scn_col = nullptr; + char col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + int64_t name_pos = 0; + bool col_exists = false; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid() || + col_id == OB_INVALID_ID ) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), K(col_id)); + } else if (OB_FAIL(construct_scn_col_name(index_arg, data_schema, col_name_buf, + OB_MAX_COLUMN_NAME_LENGTH, name_pos))) { + LOG_WARN("failed to construct scn column name", K(ret)); + } else if (OB_FAIL(check_vec_gen_col(data_schema, col_id, col_name_buf, name_pos, col_exists))) { + LOG_WARN("check scn column failed", K(ret)); + } else if (!col_exists) { + ObColumnSchemaV2 column_schema; + SMART_VAR(char[OB_MAX_DEFAULT_VALUE_LENGTH], vec_expr_def) { + MEMSET(vec_expr_def, 0, sizeof(vec_expr_def)); + int64_t def_pos = 0; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, + "VEC_SCN("))) { + LOG_WARN("print generate expr definition prefix failed", K(ret)); + } + // 这里的 index_arg->index_columns_ 包含了向量索引列,目前仅支持单列 + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + ObColumnSchemaV2 *col_schema = nullptr; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(column_schema.add_cascaded_column_id(col_schema->get_column_id()))) { + LOG_WARN("add cascaded column to generated column failed", K(ret)); + } else if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, + "`%s`, ", + col_schema->get_column_name()))) { + LOG_WARN("print column name to buffer failed", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else { + def_pos -= 2; // remove last ", " + if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, ")"))) { + LOG_WARN("print generate expr definition suffix failed", K(ret)); + } else { + ObObj default_value; + default_value.set_varchar(vec_expr_def, static_cast(def_pos)); + column_schema.set_rowkey_position(0); //非主键列 + column_schema.set_index_position(0); //非索引列 + column_schema.set_tbl_part_key_pos(0); //非partition key + column_schema.set_tenant_id(data_schema.get_tenant_id()); + column_schema.set_table_id(data_schema.get_table_id()); + column_schema.set_column_id(col_id); + column_schema.add_column_flag(GENERATED_VEC_SCN_COLUMN_FLAG); + column_schema.add_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + column_schema.set_is_hidden(true); + column_schema.set_data_type(ObIntType); // bigint + column_schema.set_data_length(0); // TODO@xiain: what length ? + column_schema.set_collation_type(CS_TYPE_BINARY); + column_schema.set_prev_column_id(UINT64_MAX); + column_schema.set_next_column_id(UINT64_MAX); + column_schema.set_nullable(true); + if (OB_FAIL(column_schema.set_column_name(col_name_buf))) { + LOG_WARN("set column name failed", K(ret)); + } else if (OB_FAIL(column_schema.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(column_schema.set_cur_default_value(default_value, column_schema.is_default_expr_v2_column()))) { + LOG_WARN("set current default value failed", K(ret)); + } else if (OB_FAIL(data_schema.add_column(column_schema))) { + LOG_WARN("add column schema to data table failed", K(ret)); + } else { + scn_col = data_schema.get_column_schema(column_schema.get_column_id()); + if (OB_ISNULL(scn_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("generate scn column failed", K(ret), KP(scn_col)); + } else { + LOG_INFO("succeed to generate scn column", KCSTRING(col_name_buf), K(col_id), K(data_schema)); + } + } + } + } + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::generate_key_column( + const ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, // not const since will add column to data schema + ObColumnSchemaV2 *&key_col) +{ + int ret = OB_SUCCESS; + key_col = nullptr; + char col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + int64_t name_pos = 0; + bool col_exists = false; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid() || + col_id == OB_INVALID_ID ) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), K(col_id)); + } else if (OB_FAIL(construct_key_col_name(index_arg, data_schema, col_name_buf, + OB_MAX_COLUMN_NAME_LENGTH, name_pos))) { + LOG_WARN("failed to construct key col name", K(ret)); + } else if (OB_FAIL(check_vec_gen_col(data_schema, col_id, col_name_buf, name_pos, col_exists))) { + LOG_WARN("check key col failed", K(ret)); + } else if (!col_exists) { + ObColumnSchemaV2 column_schema; + SMART_VAR(char[OB_MAX_DEFAULT_VALUE_LENGTH], vec_expr_def) { + MEMSET(vec_expr_def, 0, sizeof(vec_expr_def)); + int64_t def_pos = 0; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, + "VEC_KEY("))) { + LOG_WARN("print generate expr definition prefix failed", K(ret)); + } + // 这里的 index_arg->index_columns_ 包含了向量索引列,目前仅支持单列 + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + ObColumnSchemaV2 *col_schema = nullptr; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(column_schema.add_cascaded_column_id(col_schema->get_column_id()))) { + LOG_WARN("add cascaded column to generated column failed", K(ret)); + } else if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, + "`%s`, ", + col_schema->get_column_name()))) { + LOG_WARN("print column name to buffer failed", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else { + def_pos -= 2; // remove last ", " + if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, ")"))) { + LOG_WARN("print generate expr definition suffix failed", K(ret)); + } else { + ObObj default_value; + default_value.set_varchar(vec_expr_def, static_cast(def_pos)); + column_schema.set_rowkey_position(0); //非主键列 + column_schema.set_index_position(0); //非索引列 + column_schema.set_tbl_part_key_pos(0); //非partition key + column_schema.set_tenant_id(data_schema.get_tenant_id()); + column_schema.set_table_id(data_schema.get_table_id()); + column_schema.set_column_id(col_id); + column_schema.add_column_flag(GENERATED_VEC_KEY_COLUMN_FLAG); + column_schema.add_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + column_schema.set_is_hidden(true); + column_schema.set_data_type(ObVarcharType); // bigint + column_schema.set_data_length(0); // TODO@xiain: what length is fixed ? + column_schema.set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI); + column_schema.set_prev_column_id(UINT64_MAX); + column_schema.set_next_column_id(UINT64_MAX); + if (OB_FAIL(column_schema.set_column_name(col_name_buf))) { + LOG_WARN("set column name failed", K(ret)); + } else if (OB_FAIL(column_schema.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(column_schema.set_cur_default_value(default_value, column_schema.is_default_expr_v2_column()))) { + LOG_WARN("set current default value failed", K(ret)); + } else if (OB_FAIL(data_schema.add_column(column_schema))) { + LOG_WARN("add column schema to data table failed", K(ret)); + } else { + key_col = data_schema.get_column_schema(column_schema.get_column_id()); + if (OB_ISNULL(key_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("generate key col failed", K(ret), KP(key_col)); + } else { + LOG_INFO("succeed to generate key column", KCSTRING(col_name_buf), K(col_id), K(data_schema)); + } + } + } + } + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::generate_data_column( + const ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, // not const since will add column to data schema + ObColumnSchemaV2 *&data_col) +{ + int ret = OB_SUCCESS; + data_col = nullptr; + char col_name_buf[OB_MAX_COLUMN_NAME_LENGTH] = {'\0'}; + int64_t name_pos = 0; + bool col_exists = false; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid() || + col_id == OB_INVALID_ID ) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), K(col_id)); + } else if (OB_FAIL(construct_data_col_name(index_arg, data_schema, col_name_buf, + OB_MAX_COLUMN_NAME_LENGTH, name_pos))) { + LOG_WARN("failed to construct data col name", K(ret)); + } else if (OB_FAIL(check_vec_gen_col(data_schema, col_id, col_name_buf, name_pos, col_exists))) { + LOG_WARN("check vec column failed", K(ret)); + } else if (!col_exists) { + ObColumnSchemaV2 column_schema; + SMART_VAR(char[OB_MAX_DEFAULT_VALUE_LENGTH], vec_expr_def) { + MEMSET(vec_expr_def, 0, sizeof(vec_expr_def)); + int64_t def_pos = 0; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, + "VEC_DATA("))) { + LOG_WARN("print generate expr definition prefix failed", K(ret)); + } + // 这里的 index_arg->index_columns_ 包含了向量索引列,目前仅支持单列 + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + ObColumnSchemaV2 *col_schema = nullptr; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(column_schema.add_cascaded_column_id(col_schema->get_column_id()))) { + LOG_WARN("add cascaded column to generated column failed", K(ret)); + } else if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, + "`%s`, ", + col_schema->get_column_name()))) { + LOG_WARN("print column name to buffer failed", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else { + def_pos -= 2; // remove last ", " + if (OB_FAIL(databuff_printf(vec_expr_def, OB_MAX_DEFAULT_VALUE_LENGTH, def_pos, ")"))) { + LOG_WARN("print generate expr definition suffix failed", K(ret)); + } else { + ObObj default_value; + default_value.set_varchar(vec_expr_def, static_cast(def_pos)); + column_schema.set_rowkey_position(0); //非主键列 + column_schema.set_index_position(0); //非索引列 + column_schema.set_tbl_part_key_pos(0); //非partition key + column_schema.set_tenant_id(data_schema.get_tenant_id()); + column_schema.set_table_id(data_schema.get_table_id()); + column_schema.set_column_id(col_id); + column_schema.add_column_flag(GENERATED_VEC_DATA_COLUMN_FLAG); + column_schema.add_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + column_schema.set_is_hidden(true); + column_schema.set_data_type(ObLongTextType); // bigint + column_schema.set_data_length(0); // TODO@xiain: what length is fixed ? + column_schema.set_collation_type(CS_TYPE_BINARY); + column_schema.set_prev_column_id(UINT64_MAX); + column_schema.set_next_column_id(UINT64_MAX); + if (OB_FAIL(column_schema.set_column_name(col_name_buf))) { + LOG_WARN("set column name failed", K(ret)); + } else if (OB_FAIL(column_schema.set_orig_default_value(default_value))) { + LOG_WARN("set orig default value failed", K(ret)); + } else if (OB_FAIL(column_schema.set_cur_default_value(default_value, column_schema.is_default_expr_v2_column()))) { + LOG_WARN("set current default value failed", K(ret)); + } else if (OB_FAIL(data_schema.add_column(column_schema))) { + LOG_WARN("add column schema to data table failed", K(ret)); + } else { + data_col = data_schema.get_column_schema(column_schema.get_column_id()); + if (OB_ISNULL(data_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("generate data col failed", K(ret), KP(data_col)); + } else { + LOG_INFO("succeed to generate data column", KCSTRING(col_name_buf), K(col_id), K(data_schema)); + } + } + } + } + } + } + return ret; +} + +int ObVecIndexBuilderUtil::set_part_key_columns( + const ObTableSchema &data_schema, + ObTableSchema &index_schema) +{ + int ret = OB_SUCCESS; + ObTableSchema::const_column_iterator tmp_begin = data_schema.column_begin(); + ObTableSchema::const_column_iterator tmp_end = data_schema.column_end(); + HEAP_VAR(ObRowDesc, row_desc) { + for (; OB_SUCC(ret) && tmp_begin != tmp_end; tmp_begin++) { + ObColumnSchemaV2 *col_schema = (*tmp_begin); + if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), KP(col_schema)); + } else if (!col_schema->is_tbl_part_key_column()) { + } else if (is_part_key_column_exist(index_schema, *col_schema)) { + } else if (OB_FAIL(ObIndexBuilderUtil::add_column(col_schema, + false/*is_index_column*/, + false/*is_rowkey*/, + ObOrderType::DESC, + row_desc, + index_schema, + false/*is_hidden*/, + true/*is_specified_storing_col*/))) { + LOG_WARN("add_column failed", K(ret), KPC(col_schema), K(index_schema)); + } else { + LOG_INFO("success to add part key column", K(ret), KPC(col_schema)); + } + } + } // row_desc + + return ret; +} + + +bool ObVecIndexBuilderUtil::is_part_key_column_exist( + const ObTableSchema &index_schema, const ObColumnSchemaV2 &part_key_col) +{ + int ret = OB_SUCCESS; + bool is_exists = false; + const ObColumnSchemaV2 *vec_col = nullptr; + const uint64_t col_id = part_key_col.get_column_id(); + if (OB_NOT_NULL(vec_col = index_schema.get_column_schema(col_id))) { + is_exists = true; + LOG_WARN("adding column is exist", K(index_schema), K(part_key_col)); + } + return is_exists; +} + +int ObVecIndexBuilderUtil::construct_vid_col_name( + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos) +{ + int ret = OB_SUCCESS; + name_pos = 0; + if (OB_ISNULL(col_name_buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(col_name_buf)); + } else { + MEMSET(col_name_buf, 0, buf_len); + if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, + OB_VEC_VID_COLUMN_NAME))) { + LOG_WARN("print generate column name failed", K(ret)); + } else if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, "_%lu", ObTimeUtility::current_time()))){ + LOG_WARN("fail to printf current time", K(ret)); + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::construct_type_col_name( + const ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos) +{ + int ret = OB_SUCCESS; + name_pos = 0; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid() || + OB_ISNULL(col_name_buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), + KPC(index_arg), K(data_schema), K(col_name_buf)); + } else { + MEMSET(col_name_buf, 0, buf_len); + if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, + OB_VEC_TYPE_COLUMN_NAME_PREFIX))) { + LOG_WARN("print generate column prefix name failed", K(ret)); + } + const ObColumnSchemaV2 *col_schema = NULL; + // 这里的index_arg->index_columns_表示的是向量索引列,构造辅助表列名时,需要加上索引列id + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, + "_%ld", + col_schema->get_column_id()))) { + LOG_WARN("print column id to buffer failed", K(ret), K(col_schema->get_column_id())); + } + } + if (FAILEDx(databuff_printf(col_name_buf, buf_len, name_pos, "_%lu", ObTimeUtility::current_time()))){ + LOG_WARN("fail to printf current time", K(ret)); + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::construct_vector_col_name( + const ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos) +{ + int ret = OB_SUCCESS; + name_pos = 0; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid() || + OB_ISNULL(col_name_buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), + K(col_name_buf)); + } else { + MEMSET(col_name_buf, 0, buf_len); + if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, + OB_VEC_VECTOR_COLUMN_NAME_PREFIX))) { + LOG_WARN("print generate column prefix name failed", K(ret)); + } + const ObColumnSchemaV2 *col_schema = NULL; + // 这里的index_arg->index_columns_表示的是向量索引列,构造辅助表列名时,需要加上索引列id + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, + "_%ld", + col_schema->get_column_id()))) { + LOG_WARN("print column id to buffer failed", K(ret), K(col_schema->get_column_id())); + } + } + if (FAILEDx(databuff_printf(col_name_buf, buf_len, name_pos, "_%lu", ObTimeUtility::current_time()))){ + LOG_WARN("fail to printf current time", K(ret)); + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::construct_scn_col_name( + const ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos) +{ + int ret = OB_SUCCESS; + name_pos = 0; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid() || + OB_ISNULL(col_name_buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), + K(col_name_buf)); + } else { + MEMSET(col_name_buf, 0, buf_len); + if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, + OB_VEC_SCN_COLUMN_NAME_PREFIX))) { + LOG_WARN("print generate column prefix name failed", K(ret)); + } + const ObColumnSchemaV2 *col_schema = NULL; + // 这里的index_arg->index_columns_表示的是向量索引列,构造辅助表列名时,需要加上索引列id + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, + "_%ld", + col_schema->get_column_id()))) { + LOG_WARN("print column id to buffer failed", K(ret), K(col_schema->get_column_id())); + } + } + if (FAILEDx(databuff_printf(col_name_buf, buf_len, name_pos, "_%lu", ObTimeUtility::current_time()))){ + LOG_WARN("fail to printf current time", K(ret)); + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::construct_key_col_name( + const ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos) +{ + int ret = OB_SUCCESS; + name_pos = 0; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid() || + OB_ISNULL(col_name_buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), + K(col_name_buf)); + } else { + MEMSET(col_name_buf, 0, buf_len); + if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, + OB_VEC_KEY_COLUMN_NAME_PREFIX))) { + LOG_WARN("print generate column prefix name failed", K(ret)); + } + const ObColumnSchemaV2 *col_schema = NULL; + // 这里的index_arg->index_columns_表示的是向量索引列,构造辅助表列名时,需要加上索引列id + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, + "_%ld", + col_schema->get_column_id()))) { + LOG_WARN("print column id to buffer failed", K(ret), K(col_schema->get_column_id())); + } + } + if (FAILEDx(databuff_printf(col_name_buf, buf_len, name_pos, "_%lu", ObTimeUtility::current_time()))){ + LOG_WARN("fail to printf current time", K(ret)); + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::construct_data_col_name( + const ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos) +{ + int ret = OB_SUCCESS; + name_pos = 0; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid() || + OB_ISNULL(col_name_buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema), + K(col_name_buf)); + } else { + MEMSET(col_name_buf, 0, buf_len); + if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, + OB_VEC_DATA_COLUMN_NAME_PREFIX))) { + LOG_WARN("print generate column prefix name failed", K(ret)); + } + const ObColumnSchemaV2 *col_schema = NULL; + // 这里的index_arg->index_columns_表示的是向量索引列,构造辅助表列名时,需要加上索引列id + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(databuff_printf(col_name_buf, buf_len, name_pos, + "_%ld", + col_schema->get_column_id()))) { + LOG_WARN("print column id to buffer failed", K(ret), K(col_schema->get_column_id())); + } + } + if (FAILEDx(databuff_printf(col_name_buf, buf_len, name_pos, "_%lu", ObTimeUtility::current_time()))){ + LOG_WARN("fail to printf current time", K(ret)); + } + } + return ret; +} + +int ObVecIndexBuilderUtil::check_vec_cols( + const ObCreateIndexArg *index_arg, + ObTableSchema &data_schema) +{ + int ret = OB_SUCCESS; + ObColumnSchemaV2 *col_schema = NULL; + if (OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_) || + !data_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(index_arg), K(data_schema.is_valid())); + } + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg->index_columns_.count(); ++i) { + const ObString &column_name = index_arg->index_columns_.at(i).column_name_; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), + column_name.ptr()); + } else if (!col_schema->is_collection()) { // vector index is collection column type + ret = OB_ERR_BAD_VEC_INDEX_COLUMN; + LOG_USER_ERROR(OB_ERR_BAD_VEC_INDEX_COLUMN, column_name.length(), column_name.ptr()); + } else { + col_schema->add_column_flag(GENERATED_DEPS_CASCADE_FLAG); + } + } + return ret; +} + +/* + 非共享的辅助表字段,一张表中只有唯一一个column +*/ +int ObVecIndexBuilderUtil::get_vec_vid_col( + const ObTableSchema &data_schema, + const ObColumnSchemaV2 *&vid_col) +{ + int ret = OB_SUCCESS; + vid_col = nullptr; + if (!data_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema)); + } else { + for (ObTableSchema::const_column_iterator iter = data_schema.column_begin(); + OB_SUCC(ret) && OB_ISNULL(vid_col) && iter != data_schema.column_end(); + iter++) { + const ObColumnSchemaV2 *column_schema = *iter; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(data_schema)); + } else if (column_schema->is_vec_vid_column()) { + vid_col = column_schema; + } + } + } + return ret; +} + +/* + 共享辅助表中的column,由于一张主表上可能存在多个索引,有多个隐藏列,因此需要遍历查找 +*/ +int ObVecIndexBuilderUtil::get_vec_type_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&type_col) +{ + int ret = OB_SUCCESS; + schema::ColumnReferenceSet index_col_set; + type_col = nullptr; + if (!data_schema.is_valid() || + OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), KPC(index_arg)); + } else if (OB_FAIL(get_index_column_ids(data_schema, *index_arg, index_col_set))) { + LOG_WARN("fail to get index column ids", K(ret), K(data_schema), KPC(index_arg)); + } else { + for (ObTableSchema::const_column_iterator iter = data_schema.column_begin(); + OB_SUCC(ret) && OB_ISNULL(type_col) && iter != data_schema.column_end(); + iter++) { + const ObColumnSchemaV2 *column_schema = *iter; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(data_schema)); + } else if (column_schema->is_vec_type_column()) { + bool is_match = false; + if (OB_FAIL(check_index_match(*column_schema, index_col_set, is_match))) { + LOG_WARN("fail to check index match", K(ret), KPC(column_schema), K(index_col_set)); + } else if (is_match) { + type_col = column_schema; + } + } + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::get_vec_vector_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&vector_col) +{ + int ret = OB_SUCCESS; + schema::ColumnReferenceSet index_col_set; + vector_col = nullptr; + if (!data_schema.is_valid() || + OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), KPC(index_arg)); + } else if (OB_FAIL(get_index_column_ids(data_schema, *index_arg, index_col_set))) { + LOG_WARN("fail to get index column ids", K(ret), K(data_schema), KPC(index_arg)); + } else { + for (ObTableSchema::const_column_iterator iter = data_schema.column_begin(); + OB_SUCC(ret) && OB_ISNULL(vector_col) && iter != data_schema.column_end(); + iter++) { + const ObColumnSchemaV2 *column_schema = *iter; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(data_schema)); + } else if (column_schema->is_vec_vector_column()) { + bool is_match = false; + if (OB_FAIL(check_index_match(*column_schema, index_col_set, is_match))) { + LOG_WARN("fail to check index match", K(ret), KPC(column_schema), K(index_col_set)); + } else if (is_match) { + vector_col = column_schema; + } + } + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::get_vec_scn_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&scn_col) +{ + int ret = OB_SUCCESS; + schema::ColumnReferenceSet index_col_set; + scn_col = nullptr; + if (!data_schema.is_valid() || + OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), KPC(index_arg)); + } else if (OB_FAIL(get_index_column_ids(data_schema, *index_arg, index_col_set))) { + LOG_WARN("fail to get index column ids", K(ret), K(data_schema), KPC(index_arg)); + } else { + for (ObTableSchema::const_column_iterator iter = data_schema.column_begin(); + OB_SUCC(ret) && OB_ISNULL(scn_col) && iter != data_schema.column_end(); + iter++) { + const ObColumnSchemaV2 *column_schema = *iter; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(data_schema)); + } else if (column_schema->is_vec_scn_column()) { + bool is_match = false; + if (OB_FAIL(check_index_match(*column_schema, index_col_set, is_match))) { + LOG_WARN("fail to check index match", K(ret), KPC(column_schema), K(index_col_set)); + } else if (is_match) { + scn_col = column_schema; + } + } + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::get_vec_key_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&key_col) +{ + int ret = OB_SUCCESS; + schema::ColumnReferenceSet index_col_set; + key_col = nullptr; + if (!data_schema.is_valid() || + OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), KPC(index_arg)); + } else if (OB_FAIL(get_index_column_ids(data_schema, *index_arg, index_col_set))) { + LOG_WARN("fail to get index column ids", K(ret), K(data_schema), KPC(index_arg)); + } else { + for (ObTableSchema::const_column_iterator iter = data_schema.column_begin(); + OB_SUCC(ret) && OB_ISNULL(key_col) && iter != data_schema.column_end(); + iter++) { + const ObColumnSchemaV2 *column_schema = *iter; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(data_schema)); + } else if (column_schema->is_vec_key_column()) { + bool is_match = false; + if (OB_FAIL(check_index_match(*column_schema, index_col_set, is_match))) { + LOG_WARN("fail to check index match", K(ret), KPC(column_schema), K(index_col_set)); + } else if (is_match) { + key_col = column_schema; + } + } + } + } + return ret; +} + + +int ObVecIndexBuilderUtil::get_vec_data_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&data_col) +{ + int ret = OB_SUCCESS; + schema::ColumnReferenceSet index_col_set; + data_col = nullptr; + if (!data_schema.is_valid() || + OB_ISNULL(index_arg) || + !share::schema::is_vec_index(index_arg->index_type_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), KPC(index_arg)); + } else if (OB_FAIL(get_index_column_ids(data_schema, *index_arg, index_col_set))) { + LOG_WARN("fail to get index column ids", K(ret), K(data_schema), KPC(index_arg)); + } else { + for (ObTableSchema::const_column_iterator iter = data_schema.column_begin(); + OB_SUCC(ret) && OB_ISNULL(data_col) && iter != data_schema.column_end(); + iter++) { + const ObColumnSchemaV2 *column_schema = *iter; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(data_schema)); + } else if (column_schema->is_vec_data_column()) { + bool is_match = false; + if (OB_FAIL(check_index_match(*column_schema, index_col_set, is_match))) { + LOG_WARN("fail to check index match", K(ret), KPC(column_schema), K(index_col_set)); + } else if (is_match) { + data_col = column_schema; + } + } + } + } + return ret; +} + +int ObVecIndexBuilderUtil::get_index_column_ids( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg &arg, + schema::ColumnReferenceSet &index_column_ids) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!share::schema::is_vec_index(arg.index_type_) || !data_schema.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(arg), K(data_schema)); + } else { + const ObColumnSchemaV2 *col_schema = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < arg.index_columns_.count(); ++i) { + const ObString &column_name = arg.index_columns_.at(i).column_name_; + if (column_name.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column name is empty", K(ret), K(column_name)); + } else if (OB_ISNULL(col_schema = data_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), column_name.ptr()); + } else if (OB_FAIL(index_column_ids.add_member(col_schema->get_column_id()))) { + LOG_WARN("fail to add index column id", K(ret), K(col_schema->get_column_id())); + } + } + } + return ret; +} +int ObVecIndexBuilderUtil::check_index_match( + const schema::ObColumnSchemaV2 &column, + const schema::ColumnReferenceSet &index_column_ids, + bool &is_match) +{ + int ret = OB_SUCCESS; + ObSEArray cascaded_col_ids; + is_match = false; + if (OB_UNLIKELY(!column.is_valid() || index_column_ids.is_empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(column), K(index_column_ids)); + } else if (OB_FAIL(column.get_cascaded_column_ids(cascaded_col_ids))) { + LOG_WARN("fail to get cascaded column ids", K(ret), K(column)); + } else if (cascaded_col_ids.count() == index_column_ids.num_members()) { + bool mismatch = false; + for (int64_t i = 0; !mismatch && i < cascaded_col_ids.count(); ++i) { + if (!index_column_ids.has_member(cascaded_col_ids.at(i))) { + mismatch = true; + } + } + is_match = !mismatch; + } + return ret; +} + + +int ObVecIndexBuilderUtil::check_vec_gen_col( + const ObTableSchema &data_schema, + const uint64_t col_id, + const char *col_name_buf, + const int64_t name_pos, + bool &col_exists) +{ + int ret = OB_SUCCESS; + col_exists = false; + if (!data_schema.is_valid() || + OB_INVALID_ID == col_id || + OB_ISNULL(col_name_buf) || + name_pos < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(data_schema), K(col_id), + KP(col_name_buf), K(name_pos)); + } else { + // another fulltext index could have created the generated column + const ObColumnSchemaV2 *vec_col = data_schema.get_column_schema(col_name_buf); + if (OB_NOT_NULL(vec_col) && vec_col->get_column_id() != col_id) { + // check the specified column id is consistent with the existed column schema + ret = OB_ERR_INVALID_COLUMN_ID; + LOG_USER_ERROR(OB_ERR_INVALID_COLUMN_ID, static_cast(name_pos), + col_name_buf); + LOG_WARN("Column id specified by create vector index mismatch " + "with column schema id", K(ret), K(col_id), K(*vec_col)); + } else if (OB_ISNULL(vec_col) && OB_NOT_NULL(data_schema.get_column_schema(col_id))) { + // check the specified column id is not used by others + ret = OB_ERR_INVALID_COLUMN_ID; + LOG_USER_ERROR(OB_ERR_INVALID_COLUMN_ID, static_cast(name_pos), + col_name_buf); + LOG_WARN("Column id specified by create vector index has been used", + K(ret), K(col_id)); + } + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_NOT_NULL(vec_col)) { + // the generated colum is created + col_exists = true; + } else { + col_exists = false; + } + } + return ret; +} + +/* + 通过索引名和类型,获取3/4/5号表的table_schema +*/ +int ObVecIndexBuilderUtil::get_vec_table_schema_by_name( + share::schema::ObSchemaGetterGuard &schema_guard, + const int64_t tenant_id, + const int64_t database_id, + const ObString &index_name, /* domain index name */ + const share::schema::ObIndexType index_type, + ObIAllocator *allocator, + const ObTableSchema *&index_schema) +{ + int ret = OB_SUCCESS; + ObString full_index_name; + if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id || OB_INVALID_ID == database_id || + index_name.empty() || OB_ISNULL(allocator))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(tenant_id), K(database_id), K(index_name), KP(allocator)); + } else if (OB_FAIL(generate_vec_index_name(allocator, + index_type, + index_name, + full_index_name))) { + LOG_WARN("fail to generate vec index name", K(ret), K(index_type)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, + database_id, + full_index_name, + true, /* is_index */ + index_schema, + false, /* is_hidden_flag */ + true/* is_built_in_flag */))) { + LOG_WARN("fail to get table schema", + K(ret), K(tenant_id), K(database_id), K(index_name), K(full_index_name), K(index_type)); + } else if (OB_ISNULL(index_schema)) { + LOG_INFO("get vec table schema is null, maybe index has been drop", K(ret), K(full_index_name)); + } + return ret; +} + + +}//end namespace rootserver +}//end namespace oceanbase diff --git a/src/share/ob_vec_index_builder_util.h b/src/share/ob_vec_index_builder_util.h new file mode 100644 index 0000000000..a5bc584bf5 --- /dev/null +++ b/src/share/ob_vec_index_builder_util.h @@ -0,0 +1,237 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SHARE_VEC_INDEX_BUILDER_UTIL_H_ +#define OCEANBASE_SHARE_VEC_INDEX_BUILDER_UTIL_H_ + +#include "share/ob_rpc_struct.h" +#include "share/schema/ob_schema_struct.h" +#include "share/vector_index/ob_vector_index_util.h" +#include "sql/resolver/ob_schema_checker.h" +#include "sql/session/ob_sql_session_info.h" + +namespace oceanbase +{ +namespace share +{ + +class ObVecIndexBuilderUtil +{ +public: + static const int64_t OB_VEC_DELTA_BUFFER_TABLE_INDEX_COL_CNT = 2; // 辅助表的主键列数 + static const int64_t OB_VEC_INDEX_ID_TABLE_INDEX_COL_CNT = 3; // 辅助表的主键列数 + static const int64_t OB_VEC_INDEX_SNAPSHOT_DATA_TABLE_INDEX_COL_CNT = 1; // 辅助表的主键列数 + static const char * ROWKEY_VID_TABLE_NAME; + static const char * VID_ROWKEY_TABLE_NAME; + static const char * DELTA_BUFFER_TABLE_NAME_SUFFIX; + static const char * INDEX_ID_TABLE_NAME_SUFFIX; + static const char * SNAPSHOT_DATA_TABLE_NAME_SUFFIX; + +public: + static int append_vec_rowkey_vid_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list); + static int append_vec_vid_rowkey_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list); + static int append_vec_delta_buffer_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + const sql::ObSQLSessionInfo *session_info, + ObIArray &index_arg_list); + static int append_vec_index_id_arg( + const obrpc::ObCreateIndexArg &index_arg, + ObIAllocator *allocator, + ObIArray &index_arg_list); + static int append_vec_index_snapshot_data_arg( + const obrpc::ObCreateIndexArg &arg, + ObIAllocator *allocator, + ObIArray &index_arg_list); + + static int check_vec_index_allowed( + ObTableSchema &data_schema); + + static int adjust_vec_args( + obrpc::ObCreateIndexArg &index_arg, + ObTableSchema &data_schema, + ObIAllocator &allocator, + ObIArray &gen_columns); + static int set_vec_rowkey_vid_table_columns( + const obrpc::ObCreateIndexArg &arg, + const share::schema::ObTableSchema &data_schema, + share::schema::ObTableSchema &index_schema); + static int set_vec_vid_rowkey_table_columns( + const obrpc::ObCreateIndexArg &arg, + const share::schema::ObTableSchema &data_schema, + share::schema::ObTableSchema &index_schema); + static int set_vec_delta_buffer_table_columns( + const obrpc::ObCreateIndexArg &arg, + const share::schema::ObTableSchema &data_schema, + share::schema::ObTableSchema &index_schema); + static int set_vec_index_id_table_columns( + const obrpc::ObCreateIndexArg &arg, + const share::schema::ObTableSchema &data_schema, + share::schema::ObTableSchema &index_schema); + static int set_vec_index_snapshot_data_table_columns( + const obrpc::ObCreateIndexArg &arg, + const share::schema::ObTableSchema &data_schema, + share::schema::ObTableSchema &index_schema); + static int generate_vec_index_name( + common::ObIAllocator *allocator, + const share::schema::ObIndexType type, + const ObString &index_name, + ObString &new_index_name); + static int get_vec_table_schema_by_name( + share::schema::ObSchemaGetterGuard &schema_guard, + const int64_t tenant_id, + const int64_t database_id, + const ObString &index_name, /* domain index name */ + const share::schema::ObIndexType index_type, + ObIAllocator *allocator, + const ObTableSchema *&index_schema); + static int get_vector_index_prefix( + const ObTableSchema &index_schema, + ObString &prefix); +private: + static int check_vec_cols( + const obrpc::ObCreateIndexArg *index_arg, + ObTableSchema &data_schema); + static int get_vec_vid_col( + const ObTableSchema &data_schema, + const ObColumnSchemaV2 *&vid_col); + static int get_vec_type_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&type_col); + static int get_vec_vector_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&vector_col); + static int get_vec_scn_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&scn_col); + static int get_vec_key_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&key_col); + static int get_vec_data_col( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg *index_arg, + const ObColumnSchemaV2 *&data_col); + static int check_index_match( + const schema::ObColumnSchemaV2 &column, + const schema::ColumnReferenceSet &index_column_ids, + bool &is_match); + static int push_back_gen_col( + ObIArray &cols, + const ObColumnSchemaV2 *existing_col, + ObColumnSchemaV2 *generated_col); + static int generate_vid_column( + const obrpc::ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&vid_col); + static int generate_type_column( + const obrpc::ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&type_col); + static int generate_vector_column( + const obrpc::ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&vector_col); + static int generate_scn_column( + const obrpc::ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&scn_col); + static int generate_key_column( + const obrpc::ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&key_col); + static int generate_data_column( + const obrpc::ObCreateIndexArg *index_arg, + const uint64_t col_id, + ObTableSchema &data_schema, + ObColumnSchemaV2 *&data_col); + static int construct_vid_col_name( + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos); + static int construct_type_col_name( + const obrpc::ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos); + static int construct_vector_col_name( + const obrpc::ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos); + static int construct_scn_col_name( + const obrpc::ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos); + static int construct_key_col_name( + const obrpc::ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos); + static int construct_data_col_name( + const obrpc::ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + char *col_name_buf, + const int64_t buf_len, + int64_t &name_pos); + static int adjust_vec_arg( + obrpc::ObCreateIndexArg *index_arg, + const ObTableSchema &data_schema, + ObIAllocator &allocator, + const ObIArray &vec_cols); + static int inner_adjust_vec_arg( + obrpc::ObCreateIndexArg *vec_arg, + const ObIArray &vec_cols, + const int index_column_cnt, // 辅助表的主键列数 + ObIAllocator *allocator); + static int check_vec_gen_col( + const ObTableSchema &data_schema, + const uint64_t col_id, + const char *col_name_buf, + const int64_t name_pos, + bool &col_exists); + static int get_index_column_ids( + const ObTableSchema &data_schema, + const obrpc::ObCreateIndexArg &arg, + schema::ColumnReferenceSet &index_column_ids); + static bool is_part_key_column_exist( + const ObTableSchema &index_schema, + const ObColumnSchemaV2 &part_key_col); + static int set_part_key_columns( + const ObTableSchema &data_schema, + ObTableSchema &index_schema); +}; + + +}//end namespace share +}//end namespace oceanbase + +#endif //OCEANBASE_SHARE_VEC_INDEX_BUILDER_UTIL_H_ diff --git a/src/share/object/ob_obj_cast.cpp b/src/share/object/ob_obj_cast.cpp index 9d9cc9be6b..1f1f722148 100644 --- a/src/share/object/ob_obj_cast.cpp +++ b/src/share/object/ob_obj_cast.cpp @@ -33,6 +33,8 @@ #include "share/ob_lob_access_utils.h" #include "sql/engine/expr/ob_expr_lob_utils.h" #include "sql/engine/expr/ob_expr_sql_udt_utils.h" +#include "sql/engine/expr/ob_array_expr_utils.h" +#include "sql/engine/expr/ob_array_cast.h" #include "sql/engine/ob_exec_context.h" #include "lib/charset/ob_charset.h" #include "lib/geo/ob_geometry_cast.h" @@ -9556,6 +9558,45 @@ static int pl_extend_sql_udt(const ObObjType expect_type, ObObjCastParams ¶m return ret; } +static int string_collection(const ObObjType expect_type, ObObjCastParams ¶ms, + const ObObj &in, ObObj &out, const ObCastMode cast_mode) +{ + int ret = OB_SUCCESS; + if (in.is_null()) { + out.set_null(); + } else { + const uint16_t dst_subschema_id = out.get_meta().get_subschema_id(); + ObSubSchemaValue dst_meta; + if (OB_ISNULL(params.exec_ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("exec ctx is null", K(ret), K(lbt())); + } else if (OB_FAIL(params.exec_ctx_->get_sqludt_meta_by_subschema_id(dst_subschema_id, dst_meta))) { + LOG_WARN("Failed to get subshcema_meta_info", K(ret), K(dst_subschema_id)); + } else { + ObString in_str = in.get_string(); + ObIAllocator &temp_allocator = *params.allocator_v2_; + ObIArrayType *arr_dst = NULL; + ObString res_str; + const ObSqlCollectionInfo *dst_coll_info = reinterpret_cast(dst_meta.value_); + ObCollectionArrayType *dst_arr_type = static_cast(dst_coll_info->collection_meta_); + if (OB_FAIL(ObArrayTypeObjFactory::construct(temp_allocator, *dst_arr_type, arr_dst))) { + LOG_WARN("construct array obj failed", K(ret), K(dst_coll_info)); + } else if (OB_FAIL(ObArrayCastUtils::string_cast(temp_allocator, in_str, arr_dst, dst_arr_type->element_type_))) { + LOG_WARN("array element cast failed", K(ret), K(dst_coll_info)); + } else if (OB_FAIL(arr_dst->check_validity(*dst_arr_type, *arr_dst))) { + LOG_WARN("check array validty failed", K(ret), K(dst_coll_info)); + if (ret == OB_ERR_INVALID_VECTOR_DIM) { + LOG_USER_ERROR(OB_ERR_INVALID_VECTOR_DIM, static_cast(dst_arr_type->dim_cnt_), arr_dst->size()); + } + } else if (OB_FAIL(ObArrayExprUtils::set_array_obj_res(arr_dst, ¶ms, &out))) { + LOG_WARN("get array binary string failed", K(ret), K(dst_coll_info)); + } + } + } + + return ret; +} + static int pl_extend_geometry(const ObObjType expect_type, ObObjCastParams ¶ms, const ObObj &in, ObObj &out, const ObCastMode cast_mode) { @@ -10785,7 +10826,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = cast_identity,/*geometry*/ cast_not_expected,/*udt, mysql mode does not have udt*/ cast_identity,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_identity,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_identity,/*roaringbitmap*/ @@ -10818,7 +10859,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = int_geometry,/*geometry*/ cast_not_expected,/*udt*/ int_decimalint,/*decimal int*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -10851,7 +10892,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = uint_geometry,/*geometry*/ cast_not_expected,/*udt*/ uint_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -10884,7 +10925,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = float_geometry,/*geometry*/ cast_not_expected,/*udt*/ float_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -10917,7 +10958,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = double_geometry,/*geometry*/ cast_not_expected,/*udt*/ double_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -10950,7 +10991,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = number_geometry,/*geometry*/ cast_not_expected,/*udt*/ number_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -10983,7 +11024,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = datetime_geometry,/*geometry*/ cast_not_expected,/*udt*/ datetime_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -11016,7 +11057,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = date_geometry,/*geometry*/ cast_not_expected,/*udt*/ date_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -11049,7 +11090,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = time_geometry,/*geometry*/ cast_not_expected,/*udt*/ time_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -11082,7 +11123,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = year_geometry,/*geometry*/ cast_not_expected,/*udt*/ year_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -11115,7 +11156,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = string_geometry,/*geometry*/ cast_not_expected,/*udt*/ string_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + string_collection,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ string_roaringbitmap,/*roaringbitmap*/ @@ -11148,7 +11189,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = cast_not_support,/*geometry*/ pl_extend_sql_udt,/*udt*/ cast_not_support,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -11181,7 +11222,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = cast_not_support,/*geometry*/ cast_not_expected,/*udt*/ unknown_other,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -11214,7 +11255,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = string_geometry,/*geometry*/ cast_not_expected,/*udt*/ text_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + string_collection,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ string_roaringbitmap,/*roaringbitmap*/ @@ -11247,7 +11288,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = bit_geometry,/*geometry*/ cast_not_expected,/*udt*/ bit_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -11280,7 +11321,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = cast_not_expected,/*geometry*/ cast_not_expected,/*udt*/ enumset_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -11313,7 +11354,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = cast_not_support,/*geometry*/ cast_not_expected,/*udt*/ enumset_inner_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -11478,7 +11519,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = lob_geometry,/*geometry*/ cast_not_expected,/*udt*/ lob_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -11511,7 +11552,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = json_geometry,/*geometry*/ cast_not_expected,/*udt*/ json_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -11544,7 +11585,7 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = geometry_geometry,/*geometry*/ cast_not_expected,/*udt*/ geometry_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -11610,39 +11651,39 @@ ObObjCastFunc OB_OBJ_CAST[ObMaxTC][ObMaxTC] = decimalint_geometry,/*geometry*/ cast_not_expected, /*udt*/ decimalint_decimalint,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ }, { /*collection-> xxx*/ - cast_not_expected,/*null*/ - cast_not_expected,/*int*/ - cast_not_expected,/*uint*/ - cast_not_expected,/*float*/ - cast_not_expected,/*double*/ - cast_not_expected,/*number*/ - cast_not_expected,/*datetime*/ - cast_not_expected,/*date*/ - cast_not_expected,/*time*/ - cast_not_expected,/*year*/ - cast_not_expected,/*string*/ - cast_not_expected,/*extend*/ - cast_not_expected,/*unknown*/ - cast_not_expected,/*text*/ - cast_not_expected,/*bit*/ + cast_not_support,/*null*/ + cast_not_support,/*int*/ + cast_not_support,/*uint*/ + cast_not_support,/*float*/ + cast_not_support,/*double*/ + cast_not_support,/*number*/ + cast_not_support,/*datetime*/ + cast_not_support,/*date*/ + cast_not_support,/*time*/ + cast_not_support,/*year*/ + cast_not_support,/*string*/ + cast_not_support,/*extend*/ + cast_not_support,/*unknown*/ + cast_not_support,/*text*/ + cast_not_support,/*bit*/ cast_not_expected,/*enumset*/ cast_not_expected,/*enumset_inner*/ - cast_not_expected,/*otimestamp*/ - cast_not_expected,/*raw*/ + cast_not_support,/*otimestamp*/ + cast_not_support,/*raw*/ cast_not_expected,/*interval*/ cast_not_expected,/*rowid*/ cast_not_expected,/*lob*/ - cast_not_expected,/*json*/ - cast_not_expected,/*geometry*/ + cast_not_support,/*json*/ + cast_not_support,/*geometry*/ cast_not_expected, /*udt*/ - cast_not_expected,/*decimalint*/ + cast_not_support,/*decimalint*/ cast_not_expected,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ diff --git a/src/share/parameter/ob_parameter_seed.ipp b/src/share/parameter/ob_parameter_seed.ipp index 76458c4833..43c1f45030 100644 --- a/src/share/parameter/ob_parameter_seed.ipp +++ b/src/share/parameter/ob_parameter_seed.ipp @@ -2103,3 +2103,7 @@ DEF_BOOL(_enable_check_trigger_const_variables_assign, OB_TENANT_PARAMETER, "Tru DEF_BOOL(_enable_unit_gc_wait, OB_CLUSTER_PARAMETER, "True", "Used to control enable or disable the unit smooth gc feature, enabled by default.", ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); +DEF_INT(ob_vector_memory_limit_percentage, OB_TENANT_PARAMETER, "0", + "[0,100)", + "Used to control the upper limit percentage of memory resources that the vector_index module can use. Range:[0, 100)", + ObParameterAttr(Section::TENANT, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); diff --git a/src/share/rc/ob_tenant_base.h b/src/share/rc/ob_tenant_base.h index cb2017fc72..eacecb9094 100755 --- a/src/share/rc/ob_tenant_base.h +++ b/src/share/rc/ob_tenant_base.h @@ -225,6 +225,7 @@ class ObTenantErrsimEventMgr; class ObSharedMemAllocMgr; class ObIndexUsageInfoMgr; class ObResourceLimitCalculator; +class ObPluginVectorIndexService; namespace schema { class ObTenantSchemaService; @@ -375,6 +376,7 @@ using ObTableScanIteratorObjPool = common::ObServerObjectPool 0 && is_hidden(); } inline bool is_udt_related_column(bool is_oracle_mode) const { return is_extend() || is_udt_hidden_column() || @@ -266,6 +268,14 @@ int assign(const ObColumnSchemaV2 &src_schema); del_column_flag(DEFAULT_IDENTITY_COLUMN_FLAG); del_column_flag(DEFAULT_ON_NULL_IDENTITY_COLUMN_FLAG); } + /* vector index */ + inline bool is_vec_index_column() const { return ObSchemaUtils::is_vec_index_column(column_flags_); } + inline bool is_vec_vid_column() const { return ObSchemaUtils::is_vec_vid_column(column_flags_); } + inline bool is_vec_type_column() const { return ObSchemaUtils::is_vec_type_column(column_flags_); } + inline bool is_vec_vector_column() const { return ObSchemaUtils::is_vec_vector_column(column_flags_); } + inline bool is_vec_scn_column() const { return ObSchemaUtils::is_vec_scn_column(column_flags_); } + inline bool is_vec_key_column() const { return ObSchemaUtils::is_vec_key_column(column_flags_); } + inline bool is_vec_data_column() const { return ObSchemaUtils::is_vec_data_column(column_flags_); } inline bool is_fulltext_column() const { return ObSchemaUtils::is_fulltext_column(column_flags_); } inline bool is_doc_id_column() const { return ObSchemaUtils::is_doc_id_column(column_flags_); } inline bool is_word_segment_column() const { return ObSchemaUtils::is_word_segment_column(column_flags_); } @@ -340,7 +350,7 @@ int assign(const ObColumnSchemaV2 &src_schema); int get_each_column_group_name(ObString &cg_name) const; inline sql::ObLocalSessionVar &get_local_session_var() { return local_session_vars_; } inline const sql::ObLocalSessionVar &get_local_session_var() const { return local_session_vars_; } - + int is_same_collection_column(const ObColumnSchemaV2 &other, bool &is_same) const; DECLARE_VIRTUAL_TO_STRING; private: int alloc_column_ref_set(); diff --git a/src/share/schema/ob_schema_getter_guard.cpp b/src/share/schema/ob_schema_getter_guard.cpp index 6d6ffe6e45..92aabc065c 100644 --- a/src/share/schema/ob_schema_getter_guard.cpp +++ b/src/share/schema/ob_schema_getter_guard.cpp @@ -230,7 +230,8 @@ int ObSchemaGetterGuard::get_can_read_index_array( bool with_mv, bool with_global_index /* =true */, bool with_domain_index /*=true*/, - bool with_spatial_index /*=true*/) + bool with_spatial_index /*=true*/, + bool with_vector_index /*=true*/) { int ret = OB_SUCCESS; const ObTableSchema *table_schema = NULL; @@ -277,6 +278,8 @@ int ObSchemaGetterGuard::get_can_read_index_array( // does not need domain index, skip it } else if (!with_spatial_index && index_schema->is_spatial_index() && is_geo_default_srid) { // skip spatial index when geometry column has not specific srid. + } else if (!with_vector_index && index_schema->is_vec_index()) { + // skip vector index } else if (index_schema->can_read_index() && index_schema->is_index_visible()) { index_tid_array[can_read_count++] = simple_index_infos.at(i).table_id_; } else { diff --git a/src/share/schema/ob_schema_getter_guard.h b/src/share/schema/ob_schema_getter_guard.h index c3d301b148..98b8c77274 100644 --- a/src/share/schema/ob_schema_getter_guard.h +++ b/src/share/schema/ob_schema_getter_guard.h @@ -163,7 +163,8 @@ public: bool with_mv, bool with_global_index = true, bool with_domain_index = true, - bool with_spatial_index = true); + bool with_spatial_index = true, + bool with_vector_index = true); int get_table_mlog_schema(const uint64_t tenant_id, const uint64_t data_table_id, const ObTableSchema *&mlog_schema); diff --git a/src/share/schema/ob_schema_mgr.cpp b/src/share/schema/ob_schema_mgr.cpp index e3ff5c332e..a738359017 100644 --- a/src/share/schema/ob_schema_mgr.cpp +++ b/src/share/schema/ob_schema_mgr.cpp @@ -2692,7 +2692,7 @@ int ObSchemaMgr::add_table( "table_name", new_table_schema->get_table_name()); } } else if (new_table_schema->is_index_table()) { // index is in recyclebin - const bool is_built_in_index = new_table_schema->is_built_in_fts_index(); + const bool is_built_in_index = new_table_schema->is_built_in_index(); IndexNameMap &index_name_map = get_index_name_map_(is_built_in_index); if (new_table_schema->is_in_recyclebin()) { ObIndexSchemaHashWrapper index_name_wrapper(new_table_schema->get_tenant_id(), @@ -3430,7 +3430,7 @@ int ObSchemaMgr::del_table(const ObTenantTableId table) ret = OB_HASH_NOT_EXIST != hash_ret ? hash_ret : ret; } } else if (schema_to_del->is_index_table()) { - const bool is_built_in_index = schema_to_del->is_built_in_fts_index(); + const bool is_built_in_index = schema_to_del->is_built_in_index(); IndexNameMap &index_name_map = get_index_name_map_(is_built_in_index); if (schema_to_del->is_in_recyclebin()) { // index is in recyclebin ObIndexSchemaHashWrapper index_schema_wrapper(schema_to_del->get_tenant_id(), @@ -4735,7 +4735,7 @@ int ObSchemaMgr::deal_with_change_table_state(const ObSimpleTableSchemaV2 &old_t // non-hidden table to hidden table if (old_table_schema.is_index_table()) { bool is_oracle_mode = false; - const bool is_built_in_index = old_table_schema.is_built_in_fts_index(); + const bool is_built_in_index = old_table_schema.is_built_in_index(); IndexNameMap &index_name_map = get_index_name_map_(is_built_in_index); if (OB_FAIL(old_table_schema.check_if_oracle_compat_mode(is_oracle_mode))) { LOG_WARN("fail to check if tenant mode is oracle mode", K(ret)); @@ -4835,7 +4835,7 @@ int ObSchemaMgr::deal_with_table_rename( K(new_table_name)); bool is_system_table = false; if (old_table_schema.is_index_table()) { - const bool is_built_in_index = old_table_schema.is_built_in_fts_index(); + const bool is_built_in_index = old_table_schema.is_built_in_index(); bool is_oracle_mode = false; IndexNameMap &index_name_map = get_index_name_map_(is_built_in_index); if (OB_FAIL(old_table_schema.check_if_oracle_compat_mode(is_oracle_mode))) { @@ -5028,7 +5028,7 @@ int ObSchemaMgr::rebuild_table_hashmap(uint64_t &fk_cnt, uint64_t &cst_cnt) LOG_TRACE("index is", "table_id", table_schema->get_table_id(), "database_id", table_schema->get_database_id(), "table_name", table_schema->get_table_name_str()); - const bool is_built_in_index = table_schema->is_built_in_fts_index(); + const bool is_built_in_index = table_schema->is_built_in_index(); IndexNameMap &index_name_map = get_index_name_map_(is_built_in_index); // oracle mode and index is not in recyclebin if (table_schema->is_in_recyclebin()) { diff --git a/src/share/schema/ob_schema_printer.cpp b/src/share/schema/ob_schema_printer.cpp index 58ae35dd1a..a81777aa7c 100644 --- a/src/share/schema/ob_schema_printer.cpp +++ b/src/share/schema/ob_schema_printer.cpp @@ -40,7 +40,6 @@ #include "share/schema/ob_mview_info.h" - namespace oceanbase { namespace share @@ -505,7 +504,9 @@ int ObSchemaPrinter::print_generated_column_definition(const ObColumnSchemaV2 &g sql::ObRawExpr *expr = NULL; ObTimeZoneInfo tz_infos; sql::ObRawExprPrinter raw_printer(buf, buf_len, &pos, &schema_guard_, &tz_infos); - SMART_VAR(sql::ObSQLSessionInfo, session) { + SMART_VARS_3((sql::ObSQLSessionInfo, session), (ObExecContext, exec_ctx, allocator), + (ObPhysicalPlanCtx, phy_plan_ctx, allocator)) { + LinkExecCtxGuard link_guard(session, exec_ctx); if (OB_FAIL(databuff_printf(buf, buf_len, pos, " GENERATED ALWAYS AS ("))) { SHARE_SCHEMA_LOG(WARN, "fail to print keywords", K(ret)); } else if (OB_FAIL(gen_col.get_cur_default_value().get_string(expr_str))) { @@ -519,6 +520,8 @@ int ObSchemaPrinter::print_generated_column_definition(const ObColumnSchemaV2 &g /* bug: 构建ObRawExpr对象,当 expr_str = "CONCAT(first_name,' ',last_name)" 避免错误的打印成: CONCAT(first_name,\' \',last_name) */ + } else if (FALSE_IT(exec_ctx.set_physical_plan_ctx(&phy_plan_ctx))) { + } else if (FALSE_IT(exec_ctx.set_my_session(&session))) { } else if(OB_FAIL(sql::ObRawExprUtils::build_generated_column_expr(NULL, expr_str, expr_factory, @@ -535,6 +538,7 @@ int ObSchemaPrinter::print_generated_column_definition(const ObColumnSchemaV2 &g " VIRTUAL" : " STORED"))) { SHARE_SCHEMA_LOG(WARN, "print virtual keyword failed", K(ret)); } + exec_ctx.set_physical_plan_ctx(NULL); } return ret; } @@ -632,6 +636,10 @@ int ObSchemaPrinter::print_single_index_definition(const ObTableSchema *index_sc SHARE_SCHEMA_LOG(WARN, "fail to print UNIQUE KEY", K(ret)); } } + } else if (index_schema->is_vec_index()) { + if (OB_FAIL(databuff_printf(buf, buf_len, pos, " VECTOR KEY "))) { + SHARE_SCHEMA_LOG(WARN, "fail to print VECTOR KEY", K(ret)); + } } else if (index_schema->is_fts_index()) { if (OB_FAIL(databuff_printf(buf, buf_len, pos, " FULLTEXT KEY "))) { SHARE_SCHEMA_LOG(WARN, "fail to print FULLTEXT KEY", K(ret)); @@ -677,6 +685,8 @@ int ObSchemaPrinter::print_single_index_definition(const ObTableSchema *index_sc // skip doc id for fts index. } else if (index_schema->is_multivalue_index_aux() && col->is_doc_id_column()) { // skip doc id for multivalue index. + } else if (index_schema->is_vec_index() && (col->is_vec_vid_column())) { + // only need vec_type column to show index key, here skip vec_vid column of delta_buffer_table rowkey column } else if (!col->is_shadow_column()) { const ObColumnSchemaV2 *tmp_column = NULL; if (index_schema->is_multivalue_index_aux() && @@ -712,7 +722,7 @@ int ObSchemaPrinter::print_single_index_definition(const ObTableSchema *index_sc } // show storing columns in index if (OB_SUCC(ret) && !strict_compat_ && !is_no_key_options(sql_mode) - && !index_schema->is_fts_index() && !index_schema->is_multivalue_index()) { + && !index_schema->is_fts_index() && !index_schema->is_multivalue_index() && !index_schema->is_vec_index()) { int64_t column_count = index_schema->get_column_count(); if (column_count >= rowkey_count) { bool first_storing_column = true; @@ -824,8 +834,10 @@ int ObSchemaPrinter::print_table_definition_indexes(const ObTableSchema &table_s index_schema->is_global_local_index_table())) { // For strictly compatible with MySQL, // Do not print global index. - } else if (index_schema->is_built_in_fts_index()) { - // For full-text search index, only inverted table can be printed, and others table will not be printed. + } else if (index_schema->is_built_in_index()) { + // For full-text or vector index search index, only inverted table can be printed, and others table will not be printed. + } else if (index_schema->is_vec_index() && index_schema->get_index_status() != INDEX_STATUS_AVAILABLE) { + // Not show vec index which in unavaliable status } else if (OB_FAIL(print_single_index_definition(index_schema, table_schema, arena_allocator, buf, buf_len, pos, is_unique_index, is_oracle_mode, false, sql_mode, tz_info))) { LOG_WARN("print single index definition failed", K(ret)); @@ -944,6 +956,50 @@ int ObSchemaPrinter::print_table_definition_constraints(const ObTableSchema &tab return ret; } +int ObSchemaPrinter::print_vector_index_column(const ObTableSchema &table_schema, + const ObColumnSchemaV2 &column, + bool is_last, + char *buf, + int64_t buf_len, + int64_t &pos) const +{ + int ret = OB_SUCCESS; + + ObArray ctxcat_ids; + ObArenaAllocator allocator(ObModIds::OB_SCHEMA); + const ObColumnSchemaV2 *table_column = table_schema.get_column_schema(column.get_column_id()); + bool is_oracle_mode = false; + if (OB_FAIL(table_schema.check_if_oracle_compat_mode(is_oracle_mode))) { + LOG_WARN("fail to check oracle mode", KR(ret), K(table_schema)); + } else if (OB_ISNULL(table_column)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "The column schema is NULL, ", K(ret)); + } else if (OB_FAIL(table_column->get_cascaded_column_ids(ctxcat_ids))) { + STORAGE_LOG(WARN, "Failed to get cascaded column ids", K(ret)); + } else { + for (int64_t j = 0; OB_SUCC(ret) && j < ctxcat_ids.count(); ++j) { + const ObColumnSchemaV2 *ctxcat_column = NULL; + ObString new_col_name; + if (OB_ISNULL(ctxcat_column = table_schema.get_column_schema(ctxcat_ids.at(j)))) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "The column schema is NULL, ", K(ret)); + } else if (OB_FAIL(sql::ObSQLUtils::generate_new_name_with_escape_character( + allocator, + ctxcat_column->get_column_name_str(), + new_col_name, + is_oracle_mode))) { + SHARE_SCHEMA_LOG(WARN, "fail to generate new name with escape character", K(ret), K(ctxcat_column->get_column_name_str())); + } else if (OB_FAIL(print_identifier(buf, buf_len, pos, new_col_name, is_oracle_mode))) { + SHARE_SCHEMA_LOG(WARN, "fail to print column name", K(ret), K(column)); + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, + is_last && j == ctxcat_ids.count() - 1 ? ")" : ", "))) { + SHARE_SCHEMA_LOG(WARN, "fail to print column name", K(ret), K(column)); + } else { /*do nothing*/ } + } + } + return ret; +} + int ObSchemaPrinter::print_fulltext_index_column(const ObTableSchema &table_schema, const ObColumnSchemaV2 &column, bool is_last, @@ -1150,7 +1206,16 @@ int ObSchemaPrinter::print_index_column(const ObTableSchema &table_schema, if (OB_FAIL(table_schema.check_if_oracle_compat_mode(is_oracle_mode))) { LOG_WARN("fail to check oracle mode", KR(ret), K(table_schema)); } else if (column.is_hidden() && column.is_generated_column()) { //automatic generated column - if (column.is_fulltext_column()) { + if (column.is_vec_index_column()) { + if (OB_FAIL(print_vector_index_column(table_schema, + column, + is_last, + buf, + buf_len, + pos))) { + LOG_WARN("print fulltext index column failed", K(ret)); + } + } else if (column.is_fulltext_column()) { if (OB_FAIL(print_fulltext_index_column(table_schema, column, is_last, @@ -1743,6 +1808,14 @@ int ObSchemaPrinter::print_table_definition_table_options(const ObTableSchema &t SHARE_SCHEMA_LOG(WARN, "print parser name failed", K(ret), K(parser)); } } + if (OB_SUCC(ret) && table_schema.is_vec_index()) { + const ObString &vector_index_param = table_schema.get_index_params(); + if (vector_index_param.empty()) { + // skip + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "WITH (%.*s) ", vector_index_param.length(), vector_index_param.ptr()))) { + SHARE_SCHEMA_LOG(WARN, "print WITH vector index param failed", K(ret), K(vector_index_param)); + } + } if (OB_SUCCESS == ret && !is_index_tbl && !is_no_table_options(sql_mode) && !table_schema.is_external_table()) { if (OB_FAIL(print_table_definition_store_format(table_schema, buf, buf_len, pos))) { SHARE_SCHEMA_LOG(WARN, "fail to print store format", K(ret), K(table_schema)); @@ -1781,7 +1854,7 @@ int ObSchemaPrinter::print_table_definition_table_options(const ObTableSchema &t SHARE_SCHEMA_LOG(WARN, "fail to print block size", K(ret), K(table_schema)); } } - if (OB_SUCCESS == ret && !strict_compat_ && is_index_tbl && !table_schema.is_fts_index() + if (OB_SUCCESS == ret && !strict_compat_ && is_index_tbl && !table_schema.is_fts_index() && !table_schema.is_vec_index() && !table_schema.is_multivalue_index() && !is_no_key_options(sql_mode)) { const char* local_flag = table_schema.is_global_index_table() || table_schema.is_global_local_index_table() @@ -3957,6 +4030,8 @@ int ObSchemaPrinter::print_element_type(const uint64_t tenant_id, { int ret = OB_SUCCESS; if (elem_type_info->is_base_type()) { + // UDT base type should not be a collection, so just pass empty empty info + ObArray extend_info; int64_t type_pos = 0; char type_str[OB_MAX_SYS_PARAM_NAME_LENGTH]; bzero(type_str, OB_MAX_SYS_PARAM_NAME_LENGTH); @@ -3967,7 +4042,8 @@ int ObSchemaPrinter::print_element_type(const uint64_t tenant_id, elem_type_info->get_length(), elem_type_info->get_precision(), elem_type_info->get_scale(), - static_cast(elem_type_info->get_coll_type())), + static_cast(elem_type_info->get_coll_type()), + extend_info), elem_type_info, element_type_id); OZ (databuff_printf(buf, buf_len, pos, "%s", type_str)); } else { @@ -4258,7 +4334,8 @@ int ObSchemaPrinter::print_routine_param_type(const ObRoutineParam *param, param->get_param_type().get_length(), param->get_param_type().get_precision(), param->get_param_type().get_scale(), - param->get_param_type().get_collation_type())); + param->get_param_type().get_collation_type(), + param->get_extended_type_info())); OZ (databuff_printf(buf, buf_len, pos, " %s", type_str)); } else { ObString type_str; diff --git a/src/share/schema/ob_schema_printer.h b/src/share/schema/ob_schema_printer.h index a717030a7a..b6316b6afb 100644 --- a/src/share/schema/ob_schema_printer.h +++ b/src/share/schema/ob_schema_printer.h @@ -195,6 +195,12 @@ public: char *buf, int64_t buf_len, int64_t &pos) const; + int print_vector_index_column(const ObTableSchema &table_schema, + const ObColumnSchemaV2 &column, + bool is_last, + char *buf, + int64_t buf_len, + int64_t &pos) const; int print_fulltext_index_column(const ObTableSchema &table_schema, const ObColumnSchemaV2 &column, bool is_last, diff --git a/src/share/schema/ob_schema_retrieve_utils.ipp b/src/share/schema/ob_schema_retrieve_utils.ipp index 2e1a60af36..e6226f9168 100644 --- a/src/share/schema/ob_schema_retrieve_utils.ipp +++ b/src/share/schema/ob_schema_retrieve_utils.ipp @@ -1505,6 +1505,8 @@ int ObSchemaRetrieveUtils::fill_table_schema( int64_t, true, true, 0); EXTRACT_VARCHAR_FIELD_TO_CLASS_MYSQL_WITH_DEFAULT_VALUE( result, external_properties, table_schema, true/*skip null*/, true/*ignore column error*/, empty_str); + EXTRACT_VARCHAR_FIELD_TO_CLASS_MYSQL_WITH_DEFAULT_VALUE( + result, index_params, table_schema, true, ignore_column_error, ""); if (OB_SUCC(ret) && table_schema.is_materialized_view()) { bool skip_null_error = true; bool skip_column_error = true; @@ -1621,7 +1623,7 @@ int ObSchemaRetrieveUtils::fill_column_schema( } } - if (OB_SUCC(ret) && column.is_enum_or_set()) { + if (OB_SUCC(ret) && (column.is_enum_or_set() || column.is_collection())) { ObString extend_type_info; EXTRACT_VARCHAR_FIELD_MYSQL(result, "extended_type_info", extend_type_info); int64_t pos = 0; diff --git a/src/share/schema/ob_schema_service.cpp b/src/share/schema/ob_schema_service.cpp index 7ea59b53fe..b9ee37379a 100644 --- a/src/share/schema/ob_schema_service.cpp +++ b/src/share/schema/ob_schema_service.cpp @@ -543,6 +543,9 @@ int AlterTableSchema::assign(const ObTableSchema &src_schema) if (OB_SUCC(ret) && OB_FAIL(deep_copy_str(src_schema.kv_attributes_, kv_attributes_))) { LOG_WARN("Fail to deep copy ttl definition string", K(ret)); } + if (OB_SUCC(ret) && OB_FAIL(deep_copy_str(src_schema.index_params_, index_params_))) { + LOG_WARN("Fail to deep copy vector index param string", K(ret)); + } return ret; } diff --git a/src/share/schema/ob_schema_struct.h b/src/share/schema/ob_schema_struct.h index 0251b8b861..70f297e6fc 100755 --- a/src/share/schema/ob_schema_struct.h +++ b/src/share/schema/ob_schema_struct.h @@ -613,6 +613,15 @@ inline bool is_available_index_status(const ObIndexStatus index_status) const char *ob_index_status_str(ObIndexStatus status); +inline bool is_local_vec_index(const ObIndexType index_type) +{ + return index_type == INDEX_TYPE_VEC_ROWKEY_VID_LOCAL || + index_type == INDEX_TYPE_VEC_VID_ROWKEY_LOCAL || + index_type == INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL || + index_type == INDEX_TYPE_VEC_INDEX_ID_LOCAL || + index_type == INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL; +} + inline bool is_local_fts_index(const ObIndexType index_type) { return index_type == INDEX_TYPE_ROWKEY_DOC_ID_LOCAL || @@ -737,13 +746,19 @@ inline bool is_built_in_vec_index(const ObIndexType index_type) { return is_vec_rowkey_vid_type(index_type) || is_vec_vid_rowkey_type(index_type) || - is_vec_delta_buffer_type(index_type) || + is_vec_index_id_type(index_type) || is_vec_index_snapshot_data_type(index_type); } inline bool is_vec_index(const ObIndexType index_type) { - return is_vec_index_id_type(index_type) || is_built_in_vec_index(index_type); + return is_vec_delta_buffer_type(index_type) || is_built_in_vec_index(index_type); +} + +inline bool is_built_in_index(const ObIndexType index_type) +{ + return is_built_in_vec_index(index_type) || + is_built_in_fts_index(index_type); } inline bool is_index_local_storage(ObIndexType index_type) @@ -757,6 +772,7 @@ inline bool is_index_local_storage(ObIndexType index_type) || INDEX_TYPE_SPATIAL_LOCAL == index_type || INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE == index_type || is_local_fts_index(index_type) + || is_local_vec_index(index_type) || is_global_local_fts_index(index_type) || is_local_multivalue_index(index_type); } @@ -782,7 +798,8 @@ inline bool index_has_tablet(const ObIndexType &index_type) || INDEX_TYPE_SPATIAL_GLOBAL == index_type || INDEX_TYPE_SPATIAL_GLOBAL_LOCAL_STORAGE == index_type || is_fts_index(index_type) - || is_multivalue_index(index_type); + || is_multivalue_index(index_type) + || is_vec_index(index_type); } struct ObTenantTableId @@ -3417,6 +3434,30 @@ int ObPartitionUtils::get_end_( return ret; } +enum class ObVectorRefreshMethod : int64_t +{ + REFRESH_COMPLETE = 0, + REFRESH_DELTA = 1, + REBUILD_COMPLETE = 2, + MAX, +}; + +enum class ObVectorIndexOrganization : int64_t +{ + IN_MEMORY_NEIGHBOR_GRAPH = 0, + NEIGHBOR_PARTITION = 1, +}; + +enum class ObVetcorIndexDistanceMetric : int64_t +{ + EUCLIDEAN = 0, + EUCLIDEAN_SQUARED = 1, + DOT = 2, + COSINE = 3, + MANHATTAN = 4, + HAMMING = 5, +}; + enum class ObMLogPurgeMode : int64_t { IMMEDIATE_SYNC = 0, diff --git a/src/share/schema/ob_schema_utils.cpp b/src/share/schema/ob_schema_utils.cpp index b93798b841..8bba9b3bd2 100644 --- a/src/share/schema/ob_schema_utils.cpp +++ b/src/share/schema/ob_schema_utils.cpp @@ -122,6 +122,18 @@ int ObSchemaUtils::cascaded_generated_column(ObTableSchema &table_schema, if (OB_FAIL(ObResolverUtils::resolve_generated_column_info(col_def, allocator, root_expr_type, columns_names))) { LOG_WARN("get generated column expr failed", K(ret)); + } else if (T_FUN_SYS_VEC_VID == root_expr_type) { + column.add_column_flag(GENERATED_VEC_VID_COLUMN_FLAG); + } else if (T_FUN_SYS_VEC_TYPE == root_expr_type) { + column.add_column_flag(GENERATED_VEC_TYPE_COLUMN_FLAG); + } else if (T_FUN_SYS_VEC_VECTOR == root_expr_type) { + column.add_column_flag(GENERATED_VEC_VECTOR_COLUMN_FLAG); + } else if (T_FUN_SYS_VEC_SCN == root_expr_type) { + column.add_column_flag(GENERATED_VEC_SCN_COLUMN_FLAG); + } else if (T_FUN_SYS_VEC_KEY == root_expr_type) { + column.add_column_flag(GENERATED_VEC_KEY_COLUMN_FLAG); + } else if (T_FUN_SYS_VEC_DATA == root_expr_type) { + column.add_column_flag(GENERATED_VEC_DATA_COLUMN_FLAG); } else if (T_FUN_SYS_WORD_SEGMENT == root_expr_type) { column.add_column_flag(GENERATED_FTS_WORD_SEGMENT_COLUMN_FLAG); } else if (T_FUN_SYS_WORD_COUNT == root_expr_type) { @@ -140,7 +152,6 @@ int ObSchemaUtils::cascaded_generated_column(ObTableSchema &table_schema, LOG_DEBUG("succ to resolve_generated_column_info", K(col_def), K(root_expr_type), K(columns_names), K(table_schema)); } } - // TODO: materialized view if (OB_SUCC(ret) && resolve_dependencies && !column.is_doc_id_column() && (table_schema.is_table() || table_schema.is_tmp_table())) { @@ -225,6 +236,47 @@ bool ObSchemaUtils::is_default_expr_v2_column(uint64_t flag) return flag & DEFAULT_EXPR_V2_COLUMN_FLAG; } +/* vector index */ +bool ObSchemaUtils::is_vec_index_column(const uint64_t flag) +{ + return is_vec_vid_column(flag) + || is_vec_type_column(flag) + || is_vec_vector_column(flag) + || is_vec_scn_column(flag) + || is_vec_key_column(flag) + || is_vec_data_column(flag); +} + +bool ObSchemaUtils::is_vec_vid_column(const uint64_t flag) +{ + return flag & GENERATED_VEC_VID_COLUMN_FLAG; +} + +bool ObSchemaUtils::is_vec_type_column(const uint64_t flag) +{ + return flag & GENERATED_VEC_TYPE_COLUMN_FLAG; +} + +bool ObSchemaUtils::is_vec_vector_column(const uint64_t flag) +{ + return flag & GENERATED_VEC_VECTOR_COLUMN_FLAG; +} + +bool ObSchemaUtils::is_vec_scn_column(const uint64_t flag) +{ + return flag & GENERATED_VEC_SCN_COLUMN_FLAG; +} + +bool ObSchemaUtils::is_vec_key_column(const uint64_t flag) +{ + return flag & GENERATED_VEC_KEY_COLUMN_FLAG; +} + +bool ObSchemaUtils::is_vec_data_column(const uint64_t flag) +{ + return flag & GENERATED_VEC_DATA_COLUMN_FLAG; +} + bool ObSchemaUtils::is_fulltext_column(const uint64_t flag) { return is_doc_id_column(flag) diff --git a/src/share/schema/ob_schema_utils.h b/src/share/schema/ob_schema_utils.h index f1ae1a08bf..04f5d57100 100644 --- a/src/share/schema/ob_schema_utils.h +++ b/src/share/schema/ob_schema_utils.h @@ -77,6 +77,13 @@ public: static bool is_invisible_column(uint64_t flag); static bool is_cte_generated_column(uint64_t flag); static bool is_default_expr_v2_column(uint64_t flag); + static bool is_vec_index_column(const uint64_t flag); + static bool is_vec_vid_column(const uint64_t flag); + static bool is_vec_type_column(const uint64_t flag); + static bool is_vec_vector_column(const uint64_t flag); + static bool is_vec_scn_column(const uint64_t flag); + static bool is_vec_key_column(const uint64_t flag); + static bool is_vec_data_column(const uint64_t flag); static bool is_fulltext_column(const uint64_t flag); static bool is_doc_id_column(const uint64_t flag); static bool is_word_segment_column(const uint64_t flag); diff --git a/src/share/schema/ob_table_dml_param.cpp b/src/share/schema/ob_table_dml_param.cpp index df97f5f53f..1ee0d76525 100644 --- a/src/share/schema/ob_table_dml_param.cpp +++ b/src/share/schema/ob_table_dml_param.cpp @@ -14,6 +14,7 @@ #include "ob_table_dml_param.h" #include "share/schema/ob_column_schema.h" #include "storage/ob_i_store.h" +#include "share/vector_index/ob_vector_index_util.h" namespace oceanbase { @@ -47,7 +48,11 @@ ObTableSchemaParam::ObTableSchemaParam(ObIAllocator &allocator) lob_inrow_threshold_(OB_DEFAULT_LOB_INROW_THRESHOLD), multivalue_col_id_(OB_INVALID_ID), multivalue_arr_col_id_(OB_INVALID_ID), - data_table_rowkey_column_num_(0) + data_table_rowkey_column_num_(0), + vec_id_col_id_(OB_INVALID_ID), + vec_index_param_(), + vec_dim_(0), + vec_vector_col_id_(OB_INVALID_ID) { } @@ -81,6 +86,10 @@ void ObTableSchemaParam::reset() multivalue_col_id_ = OB_INVALID_ID; multivalue_arr_col_id_ = OB_INVALID_ID; data_table_rowkey_column_num_ =0 ; + vec_id_col_id_ = OB_INVALID_ID; + vec_index_param_.reset(); + vec_dim_ = 0; + vec_vector_col_id_ = OB_INVALID_ID; } int ObTableSchemaParam::convert(const ObTableSchema *schema) @@ -157,6 +166,30 @@ int ObTableSchemaParam::convert(const ObTableSchema *schema) multivalue_arr_col_id_ = column_schema->get_column_id(); } } + } else if (schema->is_vec_delta_buffer_type() || schema->is_vec_rowkey_vid_type() || schema->is_vec_vid_rowkey_type()) { + if (schema->is_vec_delta_buffer_type()) { + if (OB_FAIL(ob_write_string(allocator_, schema->get_index_params(), vec_index_param_))) { + LOG_WARN("fail to copy vec index param", K(ret), K(schema->get_index_params())); + } else if (OB_FAIL(ObVectorIndexUtil::get_vector_index_column_dim(*schema, vec_dim_))) { + LOG_WARN("fail to get vector col dim", K(ret)); + } else if (vec_dim_ == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get vector dim is zero, fail to calc", K(ret), K(vec_dim_), KPC(schema)); + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < schema->get_column_count(); ++i) { + const ObColumnSchemaV2 *column_schema = schema->get_column_schema_by_idx(i); + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(i), KPC(schema)); + } else if (column_schema->is_vec_vid_column()) { + vec_id_col_id_ = column_schema->get_column_id(); + } else if (schema->is_vec_delta_buffer_type()) { + if (column_schema->is_vec_vector_column()) { + vec_vector_col_id_ = column_schema->get_column_id(); + } + } + } } if (OB_FAIL(ret)) { @@ -500,6 +533,14 @@ OB_DEF_SERIALIZE(ObTableSchemaParam) LOG_WARN("fail to serialize fts parser name", K(ret)); } } + OB_UNIS_ENCODE(vec_id_col_id_); + if (OB_SUCC(ret)) { + if (OB_FAIL(vec_index_param_.serialize(buf, buf_len, pos))) { + LOG_WARN("failed to serialize vec index param", K(ret)); + } + } + OB_UNIS_ENCODE(vec_dim_); + OB_UNIS_ENCODE(vec_vector_col_id_); return ret; } @@ -620,6 +661,17 @@ OB_DEF_DESERIALIZE(ObTableSchemaParam) LOG_WARN("fail to copy fts parser name", K(ret), K(tmp_name)); } } + OB_UNIS_DECODE(vec_id_col_id_); + if (OB_SUCC(ret) && pos < data_len) { + ObString tmp_vec_index_param; + if (OB_FAIL(tmp_vec_index_param.deserialize(buf, data_len, pos))) { + LOG_WARN("fail to deserialize vec index param", K(ret)); + } else if (OB_FAIL(ob_write_string(allocator_, tmp_vec_index_param, vec_index_param_))) { + LOG_WARN("fail to copy vec index param", K(ret), K(tmp_vec_index_param)); + } + } + OB_UNIS_DECODE(vec_dim_); + OB_UNIS_DECODE(vec_vector_col_id_); return ret; } @@ -667,6 +719,10 @@ OB_DEF_SERIALIZE_SIZE(ObTableSchemaParam) OB_UNIS_ADD_LEN(data_table_rowkey_column_num_); OB_UNIS_ADD_LEN(doc_id_col_id_); len += fts_parser_name_.get_serialize_size(); + OB_UNIS_ADD_LEN(vec_id_col_id_); + len += vec_index_param_.get_serialize_size(); + OB_UNIS_ADD_LEN(vec_dim_); + OB_UNIS_ADD_LEN(vec_vector_col_id_); return len; } diff --git a/src/share/schema/ob_table_dml_param.h b/src/share/schema/ob_table_dml_param.h index 9f5c271f1f..996471c4b4 100644 --- a/src/share/schema/ob_table_dml_param.h +++ b/src/share/schema/ob_table_dml_param.h @@ -60,6 +60,10 @@ public: OB_INLINE uint64_t get_spatial_mbr_col_id() const { return spatial_mbr_col_id_; } OB_INLINE int64_t get_multivalue_col_id() const { return multivalue_col_id_; } OB_INLINE int64_t get_multivalue_array_col_id() const { return multivalue_arr_col_id_; } + OB_INLINE int64_t get_vec_id_col_id() const { return vec_id_col_id_; } + OB_INLINE int64_t get_vec_vector_col_id() const { return vec_vector_col_id_; } + OB_INLINE ObString get_vec_index_param() const { return vec_index_param_; } + OB_INLINE int64_t get_vec_dim() const { return vec_dim_; } OB_INLINE int64_t get_lob_inrow_threshold() const { return lob_inrow_threshold_; } OB_INLINE int64_t get_column_count() const { return columns_.count(); } OB_INLINE const Columns &get_columns() const { return columns_; } @@ -81,7 +85,11 @@ public: OB_INLINE bool is_fts_index_aux() const { return share::schema::is_fts_index_aux(index_type_); } OB_INLINE bool is_multivalue_index() const { return share::schema::is_multivalue_index(index_type_); } OB_INLINE bool is_multivalue_index_aux() const { return share::schema::is_multivalue_index_aux(index_type_); } + OB_INLINE bool is_vector_delta_buffer() const { return share::schema::is_vec_delta_buffer_type(index_type_); } + OB_INLINE bool is_vector_index_id() const { return share::schema::is_vec_index_id_type(index_type_); } + OB_INLINE bool is_vector_index_snapshot() const { return share::schema::is_vec_index_snapshot_data_type(index_type_); } OB_INLINE bool is_index_local_storage() const { return share::schema::is_index_local_storage(index_type_); } + OB_INLINE bool is_vector_index() const { return share::schema::is_vec_index(index_type_); } int is_rowkey_column(const uint64_t column_id, bool &is_rowkey) const; int is_column_nullable_for_write(const uint64_t column_id, bool &is_nullable_for_write) const; @@ -132,6 +140,10 @@ private: uint64_t multivalue_col_id_; uint64_t multivalue_arr_col_id_; int64_t data_table_rowkey_column_num_; + uint64_t vec_id_col_id_; + ObString vec_index_param_; + int64_t vec_dim_; + uint64_t vec_vector_col_id_; }; class ObTableDMLParam diff --git a/src/share/schema/ob_table_param.cpp b/src/share/schema/ob_table_param.cpp index b05f367365..59ad1ffc51 100644 --- a/src/share/schema/ob_table_param.cpp +++ b/src/share/schema/ob_table_param.cpp @@ -628,7 +628,8 @@ ObTableParam::ObTableParam(ObIAllocator &allocator) is_spatial_index_(false), is_fts_index_(false), is_multivalue_index_(false), - is_column_replica_table_(false) + is_column_replica_table_(false), + is_vec_index_(false) { reset(); } @@ -658,6 +659,7 @@ void ObTableParam::reset() is_fts_index_ = false; is_multivalue_index_ = false; is_column_replica_table_ = false; + is_vec_index_ = false; } OB_DEF_SERIALIZE(ObTableParam) @@ -699,6 +701,9 @@ OB_DEF_SERIALIZE(ObTableParam) if (OB_SUCC(ret)) { OB_UNIS_ENCODE(is_column_replica_table_); } + if (OB_SUCC(ret)) { + OB_UNIS_ENCODE(is_vec_index_); + } return ret; } @@ -787,6 +792,10 @@ OB_DEF_DESERIALIZE(ObTableParam) LST_DO_CODE(OB_UNIS_DECODE, is_column_replica_table_); } + if (OB_SUCC(ret)) { + LST_DO_CODE(OB_UNIS_DECODE, + is_vec_index_); + } return ret; } @@ -831,6 +840,10 @@ OB_DEF_SERIALIZE_SIZE(ObTableParam) LST_DO_CODE(OB_UNIS_ADD_LEN, is_column_replica_table_); } + if (OB_SUCC(ret)) { + LST_DO_CODE(OB_UNIS_ADD_LEN, + is_vec_index_); + } return len; } @@ -1548,6 +1561,7 @@ int64_t ObTableParam::to_string(char *buf, const int64_t buf_len) const K_(enable_lob_locator_v2), K_(is_fts_index), K_(parser_name), + K_(is_vec_index), K_(is_column_replica_table)); J_OBJ_END(); diff --git a/src/share/schema/ob_table_param.h b/src/share/schema/ob_table_param.h index 9a5378f816..eef4e54d32 100644 --- a/src/share/schema/ob_table_param.h +++ b/src/share/schema/ob_table_param.h @@ -312,6 +312,8 @@ public: inline void set_is_fts_index(const bool is_fts_index) { is_fts_index_ = is_fts_index; } inline int64_t is_multivalue_index() const { return is_multivalue_index_; } inline void set_is_multivalue_index(bool is_multivalue_index) { is_multivalue_index_ = is_multivalue_index; } + inline bool is_vec_index() const { return is_vec_index_; } + inline void set_is_vec_index(const bool is_vec_index) { is_vec_index_ = is_vec_index; } inline bool use_lob_locator() const { return use_lob_locator_; } inline bool enable_lob_locator_v2() const { return enable_lob_locator_v2_; } inline bool &get_enable_lob_locator_v2() { return enable_lob_locator_v2_; } @@ -408,6 +410,7 @@ private: bool is_fts_index_; bool is_multivalue_index_; bool is_column_replica_table_; + bool is_vec_index_; }; } //namespace schema } //namespace share diff --git a/src/share/schema/ob_table_schema.cpp b/src/share/schema/ob_table_schema.cpp index 6e61286a68..9838c6ffa1 100644 --- a/src/share/schema/ob_table_schema.cpp +++ b/src/share/schema/ob_table_schema.cpp @@ -562,7 +562,7 @@ bool ObSimpleTableSchemaV2::is_valid() const ret = false; LOG_WARN("invalid data table_id", K(ret), K(data_table_id_)); } else if (is_index_table() && !is_normal_index() && !is_unique_index() - && !is_domain_index() && !is_fts_index() && !is_multivalue_index()) { + && !is_domain_index() && !is_vec_index() && !is_fts_index() && !is_multivalue_index()) { ret = false; LOG_WARN("table_type is not consistent with index_type", "table_type", static_cast(table_type_), @@ -4632,6 +4632,13 @@ int ObTableSchema::check_alter_column_type(const ObColumnSchemaV2 &src_column, LOG_USER_ERROR(OB_NOT_SUPPORTED, err_msg); } } + } else if (src_column.is_collection()) { + bool is_same = false; + if (OB_FAIL(src_column.is_same_collection_column(dst_column, is_same))) { + LOG_WARN("failed to check whether is same collection cols", K(ret)); + } else { + is_offline = !is_same; + } } return ret; } @@ -4877,7 +4884,7 @@ int ObTableSchema::check_alter_column_in_index(const ObColumnSchemaV2 &src_colum } } if (OB_SUCC(ret) && is_in_index) { - if (ob_is_text_tc(dst_column.get_data_type())) { + if (!index_table_schema->is_vec_index() && ob_is_text_tc(dst_column.get_data_type())) { ret = OB_ERR_WRONG_KEY_COLUMN; LOG_USER_ERROR(OB_ERR_WRONG_KEY_COLUMN, dst_column.get_column_name_str().length(), dst_column.get_column_name_str().ptr()); @@ -4980,6 +4987,10 @@ int ObTableSchema::check_is_exactly_same_type(const ObColumnSchemaV2 &src_column is_same = true; } } + } else if (src_column.is_collection()) { + if (OB_FAIL(src_column.is_same_collection_column(dst_column, is_same))) { + LOG_WARN("failed to check whether is same collection cols", K(ret)); + } } else { if (src_column.is_string_type() || src_column.is_raw() || ob_is_rowid_tc(src_column.get_data_type()) @@ -8388,6 +8399,44 @@ int ObTableSchema::get_fulltext_column_ids(uint64_t &doc_id_col_id, uint64_t &ft return ret; } +int ObTableSchema::get_vec_index_column_id(uint64_t &vec_vector_id) const +{ + int ret = OB_SUCCESS; + vec_vector_id = OB_INVALID_ID; + for (int64_t i = 0; OB_SUCC(ret) && OB_INVALID_ID == vec_vector_id && i < get_column_count(); ++i) { + const ObColumnSchemaV2 *column_schema = get_column_schema_by_idx(i); + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(i), KPC(this)); + } else if (column_schema->is_vec_vector_column()) { + vec_vector_id = column_schema->get_column_id(); + } + } + if (OB_FAIL(ret) || OB_INVALID_ID == vec_vector_id) { + ret = ret != OB_SUCCESS ? ret : OB_ERR_INDEX_KEY_NOT_FOUND; + } + return ret; +} + +int ObTableSchema::get_vec_index_vid_col_id(uint64_t &vec_id_col_id) const +{ + int ret = OB_SUCCESS; + vec_id_col_id = OB_INVALID_ID; + for (int64_t i = 0; OB_SUCC(ret) && OB_INVALID_ID == vec_id_col_id && i < get_column_count(); ++i) { + const ObColumnSchemaV2 *column_schema = get_column_schema_by_idx(i); + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(i), KPC(this)); + } else if (column_schema->is_vec_vid_column()) { + vec_id_col_id = column_schema->get_column_id(); + } + } + if (OB_FAIL(ret) || OB_INVALID_ID == vec_id_col_id) { + ret = ret != OB_SUCCESS ? ret : OB_ERR_INDEX_KEY_NOT_FOUND; + } + return ret; +} + int ObTableSchema::get_rowkey_doc_tid(uint64_t &index_table_id) const { int ret = OB_SUCCESS; @@ -8409,6 +8458,27 @@ int ObTableSchema::get_rowkey_doc_tid(uint64_t &index_table_id) const return ret; } +int ObTableSchema::get_rowkey_vid_tid(uint64_t &index_table_id) const +{ + int ret = OB_SUCCESS; + ObSEArray simple_index_infos; + index_table_id = OB_INVALID_ID; + if (OB_FAIL(get_simple_index_infos(simple_index_infos))) { + LOG_WARN("get simple_index_infos failed", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < simple_index_infos.count(); ++i) { + if (share::schema::is_vec_rowkey_vid_type(simple_index_infos.at(i).index_type_)) { + index_table_id = simple_index_infos.at(i).table_id_; + break; + } + } + if (OB_SUCC(ret) && OB_UNLIKELY(OB_INVALID_ID == index_table_id)) { + ret = OB_ERR_INDEX_KEY_NOT_FOUND; + LOG_DEBUG("not found rowkey vid index", K(ret), K(simple_index_infos)); + } + return ret; +} + int ObTableSchema::check_has_local_index(ObSchemaGetterGuard &schema_guard, bool &has_local_index) const { int ret = OB_SUCCESS; @@ -8483,6 +8553,30 @@ int ObTableSchema::check_has_multivalue_index(ObSchemaGetterGuard &schema_guard, return ret; } +int ObTableSchema::check_has_vector_index(ObSchemaGetterGuard &schema_guard, bool &has_vector_index) const +{ + int ret = OB_SUCCESS; + ObSEArray simple_index_infos; + const ObSimpleTableSchemaV2 *index_schema = NULL; + const uint64_t tenant_id = get_tenant_id(); + has_vector_index = false; + if (OB_FAIL(get_simple_index_infos(simple_index_infos))) { + LOG_WARN("get simple_index_infos failed", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < simple_index_infos.count(); ++i) { + if (OB_FAIL(schema_guard.get_simple_table_schema(tenant_id, simple_index_infos.at(i).table_id_, index_schema))) { + LOG_WARN("failed to get table schema", K(ret), K(tenant_id), K(simple_index_infos.at(i).table_id_)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("cannot get index table schema for table ", K(simple_index_infos.at(i).table_id_)); + } else if (index_schema->is_vec_index()) { + has_vector_index = true; + break; + } + } + return ret; +} + int ObTableSchema::get_spatial_index_column_ids(common::ObIArray &column_ids) const { // spatial index is a kind of domain index @@ -9237,6 +9331,22 @@ int ObTableSchema::get_doc_id_rowkey_tid(uint64_t &doc_id_rowkey_tid) const return ret; } +int ObTableSchema::get_vec_id_rowkey_tid(uint64_t &vec_id_rowkey_tid) const +{ + int ret = OB_SUCCESS; + vec_id_rowkey_tid = OB_INVALID_ID; + for (int64_t i = 0; OB_SUCC(ret) && i < simple_index_infos_.count(); ++i) { + if (share::schema::is_vec_vid_rowkey_type(simple_index_infos_.at(i).index_type_)) { + vec_id_rowkey_tid = simple_index_infos_.at(i).table_id_; + break; + } + } + if (OB_INVALID_ID == vec_id_rowkey_tid) { + ret = OB_ERR_FT_COLUMN_NOT_INDEXED; + } + return ret; +} + int64_t ObPrintableTableSchema::to_string(char *buf, const int64_t buf_len) const { int64_t pos = 0; diff --git a/src/share/schema/ob_table_schema.h b/src/share/schema/ob_table_schema.h index 506809a90a..bc93c02eae 100644 --- a/src/share/schema/ob_table_schema.h +++ b/src/share/schema/ob_table_schema.h @@ -990,8 +990,17 @@ public: inline bool is_multivalue_index_aux() const; inline bool is_spatial_index() const; inline static bool is_spatial_index(const ObIndexType index_type); + inline bool is_vec_index() const; + inline static bool is_vec_index(const ObIndexType index_type); + inline bool is_built_in_vec_index() const; + inline bool is_vec_rowkey_vid_type() const; + inline bool is_vec_vid_rowkey_type() const; + inline bool is_vec_delta_buffer_type() const; + inline bool is_vec_index_id_type() const; + inline bool is_vec_index_snapshot_data_type() const; inline bool is_fts_index() const; inline bool is_built_in_fts_index() const; + inline bool is_built_in_index() const; // fts / vector index inline bool is_rowkey_doc_id() const; inline bool is_doc_id_rowkey() const; inline bool is_fts_index_aux() const; @@ -1493,7 +1502,8 @@ public: int get_spatial_geo_column_id(uint64_t &geo_column_id) const; int get_spatial_index_column_ids(common::ObIArray &column_ids) const; int get_fulltext_column_ids(uint64_t &doc_id_col_id, uint64_t &ft_col_id) const; - + int get_vec_index_column_id(uint64_t &vec_vector_id) const; + int get_vec_index_vid_col_id(uint64_t &vec_id_col_id) const; // get columns for building rowid int get_column_ids_serialize_to_rowid(common::ObIArray &col_ids, int64_t &rowkey_cnt) const; @@ -1754,6 +1764,7 @@ public: int check_has_local_index(ObSchemaGetterGuard &schema_guard, bool &has_local_index) const; int check_has_fts_index(ObSchemaGetterGuard &schema_guard, bool &has_fts_index) const; int check_has_multivalue_index(ObSchemaGetterGuard &schema_guard, bool &has_multivalue_index) const; + int check_has_vector_index(ObSchemaGetterGuard &schema_guard, bool &has_vector_index) const; int is_real_unique_index_column(ObSchemaGetterGuard &schema_guard, uint64_t column_id, bool &is_uni) const; @@ -1764,9 +1775,11 @@ public: uint64_t column_id, bool &is_mul) const; int get_doc_id_rowkey_tid(uint64_t &doc_id_rowkey_tid) const; + int get_vec_id_rowkey_tid(uint64_t &doc_id_rowkey_tid) const; void set_aux_lob_meta_tid(const uint64_t& table_id) { aux_lob_meta_tid_ = table_id; } void set_aux_lob_piece_tid(const uint64_t& table_id) { aux_lob_piece_tid_ = table_id; } int get_rowkey_doc_tid(uint64_t &index_table_id) const; + int get_rowkey_vid_tid(uint64_t &index_table_id) const; uint64_t get_aux_lob_meta_tid() const { return aux_lob_meta_tid_; } uint64_t get_aux_lob_piece_tid() const { return aux_lob_piece_tid_; } bool has_lob_column() const; @@ -2090,6 +2103,46 @@ inline bool ObSimpleTableSchemaV2::is_multivalue_index_aux() const return share::schema::is_multivalue_index_aux(index_type_); } +inline bool ObSimpleTableSchemaV2::is_vec_index() const +{ + return share::schema::is_vec_index(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_vec_index(const ObIndexType index_type) +{ + return share::schema::is_vec_index(index_type); +} + +inline bool ObSimpleTableSchemaV2::is_built_in_vec_index() const +{ + return share::schema::is_built_in_vec_index(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_vec_rowkey_vid_type() const +{ + return share::schema::is_vec_rowkey_vid_type(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_vec_vid_rowkey_type() const +{ + return share::schema::is_vec_vid_rowkey_type(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_vec_delta_buffer_type() const +{ + return share::schema::is_vec_delta_buffer_type(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_vec_index_id_type() const +{ + return share::schema::is_vec_index_id_type(index_type_); +} + +inline bool ObSimpleTableSchemaV2::is_vec_index_snapshot_data_type() const +{ + return share::schema::is_vec_index_snapshot_data_type(index_type_); +} + inline bool ObSimpleTableSchemaV2::is_fts_index() const { return share::schema::is_fts_index(index_type_); @@ -2100,6 +2153,11 @@ inline bool ObSimpleTableSchemaV2::is_built_in_fts_index() const return share::schema::is_built_in_fts_index(index_type_); } +inline bool ObSimpleTableSchemaV2::is_built_in_index() const +{ + return share::schema::is_built_in_index(index_type_); +} + inline bool ObSimpleTableSchemaV2::is_rowkey_doc_id() const { return share::schema::is_rowkey_doc_aux(index_type_); diff --git a/src/share/schema/ob_table_sql_service.cpp b/src/share/schema/ob_table_sql_service.cpp index ea60525aa5..7b7295604f 100644 --- a/src/share/schema/ob_table_sql_service.cpp +++ b/src/share/schema/ob_table_sql_service.cpp @@ -34,6 +34,8 @@ #include "observer/omt/ob_tenant_timezone_mgr.h" #include "sql/ob_sql_utils.h" #include "share/ob_time_utility2.h" +#include "share/ob_vec_index_builder_util.h" + namespace oceanbase { using namespace common; @@ -1958,6 +1960,10 @@ int ObTableSqlService::add_table( uint64_t tenant_data_version = 0; if (OB_FAIL(GET_MIN_DATA_VERSION(exec_tenant_id, tenant_data_version))) { LOG_WARN("get tenant data version failed", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_3_0 && table.is_vec_index()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.3, vector index is not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.3, vector index"); } else if (tenant_data_version < DATA_VERSION_4_1_0_0 && table.is_spatial_index()) { ret = OB_NOT_SUPPORTED; LOG_WARN("tenant data version is less than 4.1, spatial index is not supported", K(ret), K(tenant_data_version)); @@ -2454,7 +2460,6 @@ int ObTableSqlService::create_table(ObTableSchema &table, int64_t end_usec = 0; int64_t cost_usec = 0; const uint64_t tenant_id = table.get_tenant_id(); - if (!table.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid create table argument, ", K(table)); @@ -3000,6 +3005,10 @@ int ObTableSqlService::gen_table_dml( && OB_UNLIKELY(0 != table.get_auto_increment_cache_size())) { ret = OB_NOT_SUPPORTED; LOG_WARN("auto increment cache size not support before 4.2.3", K(ret), K(table)); + } else if (data_version < DATA_VERSION_4_3_3_0 + && !table.get_index_params().empty()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("index params setting is not support in version less than 433"); } else { if (data_version < DATA_VERSION_4_2_1_0 && (!table.get_ttl_definition().empty() || !table.get_kv_attributes().empty())) { @@ -3012,6 +3021,7 @@ int ObTableSqlService::gen_table_dml( LOG_WARN(QUEUING_MODE_NOT_COMPAT_WARN_STR, K(ret), K(table)); } else {} if (OB_SUCC(ret)) { + ObString empty_str(""); const ObPartitionOption &part_option = table.get_part_option(); const ObPartitionOption &sub_part_option = table.get_sub_part_option(); const char *expire_info = table.get_expire_info().length() <= 0 ? @@ -3031,6 +3041,7 @@ int ObTableSqlService::gen_table_dml( "" : table.get_ttl_definition().ptr(); const char *kv_attributes = table.get_kv_attributes().empty() ? "" : table.get_kv_attributes().ptr(); + ObString index_params = table.get_index_params().empty() ? empty_str : table.get_index_params(); ObString local_session_var; ObArenaAllocator allocator(ObModIds::OB_SCHEMA_OB_SCHEMA_ARENA); if (OB_FAIL(check_table_options(table))) { @@ -3155,6 +3166,8 @@ int ObTableSqlService::gen_table_dml( && OB_FAIL(dml.add_column("auto_increment_cache_size", table.get_auto_increment_cache_size()))) || (data_version >= DATA_VERSION_4_3_2_1 && OB_FAIL(dml.add_column("external_properties", ObHexEscapeSqlStr(table.get_external_properties())))) + || (data_version >= DATA_VERSION_4_3_3_0 + && OB_FAIL(dml.add_column("index_params", ObHexEscapeSqlStr(index_params)))) || (data_version >= DATA_VERSION_4_3_3_0 && OB_FAIL(dml.add_column("local_session_vars", ObHexEscapeSqlStr(local_session_var)))) ) { @@ -3197,8 +3210,12 @@ int ObTableSqlService::gen_table_options_dml( && (table.get_auto_increment_cache_size() != 0)) { ret = OB_NOT_SUPPORTED; LOG_WARN("table auto_increment_cache_size not support before 4.2.3", K(ret), K(table)); + } else if (data_version < DATA_VERSION_4_3_3_0 && (!table.get_index_params().empty())) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("index params setting is not support before 433", K(ret), K(table)); } else {} if (OB_SUCC(ret)) { + ObString empty_str(""); const ObPartitionOption &part_option = table.get_part_option(); const ObPartitionOption &sub_part_option = table.get_sub_part_option(); const char *table_name = table.get_table_name_str().length() <= 0 ? @@ -3218,6 +3235,7 @@ int ObTableSqlService::gen_table_options_dml( "" : table.get_ttl_definition().ptr(); const char *kv_attributes = table.get_kv_attributes().length() <= 0 ? "" : table.get_kv_attributes().ptr(); + ObString index_params = table.get_index_params().empty() ? empty_str : table.get_index_params(); if (OB_FAIL(check_table_options(table))) { LOG_WARN("fail to check table option", K(ret), K(table)); @@ -3307,6 +3325,8 @@ int ObTableSqlService::gen_table_options_dml( && OB_FAIL(dml.add_column("column_store", table.is_column_store_supported()))) || ((data_version >= DATA_VERSION_4_3_2_0 || (data_version < DATA_VERSION_4_3_0_0 && data_version >= MOCK_DATA_VERSION_4_2_3_0)) && OB_FAIL(dml.add_column("auto_increment_cache_size", table.get_auto_increment_cache_size()))) + || (data_version >= DATA_VERSION_4_3_3_0 + && OB_FAIL(dml.add_column("index_params", ObHexEscapeSqlStr(index_params)))) ) { LOG_WARN("add column failed", K(ret)); } @@ -4229,6 +4249,10 @@ int ObTableSqlService::gen_column_dml( } else if (OB_FAIL(ObCompatModeGetter::get_table_compat_mode( column.get_tenant_id(), column.get_table_id(), compat_mode))) { LOG_WARN("fail to get tenant mode", K(ret), K(column)); + } else if (tenant_data_version < DATA_VERSION_4_3_3_0 && column.is_collection()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.3, array type is not supported", K(ret), K(tenant_data_version), K(column)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.3, array"); } else if ((tenant_data_version < DATA_VERSION_4_2_2_0 || (tenant_data_version >= DATA_VERSION_4_3_0_0 && tenant_data_version < DATA_VERSION_4_3_1_0)) && column.is_geometry() && compat_mode ==lib::Worker::CompatMode::ORACLE) { @@ -4332,7 +4356,7 @@ int ObTableSqlService::gen_column_dml( cur_default_value.reset(); } ObString bin_extended_type_info; - if (OB_SUCC(ret) && column.is_enum_or_set()) { + if (OB_SUCC(ret) && (column.is_enum_or_set() || column.is_collection())) { int64_t pos = 0; extended_type_info_buf = static_cast(allocator.alloc(OB_MAX_VARBINARY_LENGTH)); if (OB_ISNULL(extended_type_info_buf)) { diff --git a/src/share/system_variable/ob_sys_var_class_type.h b/src/share/system_variable/ob_sys_var_class_type.h index 45489c69da..c90d16ab95 100644 --- a/src/share/system_variable/ob_sys_var_class_type.h +++ b/src/share/system_variable/ob_sys_var_class_type.h @@ -519,6 +519,7 @@ enum ObSysVarClassType SYS_VAR_GROUP_REPLICATION_GROUP_SEEDS = 10416, SYS_VAR_SLAVE_ROWS_SEARCH_ALGORITHMS = 10417, SYS_VAR_SLAVE_TYPE_CONVERSIONS = 10418, + SYS_VAR_OB_HNSW_EF_SEARCH = 10419, SYS_VAR_DELAY_KEY_WRITE = 10632, SYS_VAR_INNODB_LARGE_PREFIX = 10633, SYS_VAR_KEY_BUFFER_SIZE = 10634, diff --git a/src/share/system_variable/ob_system_variable_alias.h b/src/share/system_variable/ob_system_variable_alias.h index 1524bf679c..46d9c9ffbe 100644 --- a/src/share/system_variable/ob_system_variable_alias.h +++ b/src/share/system_variable/ob_system_variable_alias.h @@ -514,6 +514,7 @@ namespace share static const char* const OB_SV_GROUP_REPLICATION_GROUP_SEEDS = "group_replication_group_seeds"; static const char* const OB_SV_SLAVE_ROWS_SEARCH_ALGORITHMS = "slave_rows_search_algorithms"; static const char* const OB_SV_SLAVE_TYPE_CONVERSIONS = "slave_type_conversions"; + static const char* const OB_SV_HNSW_EF_SEARCH = "ob_hnsw_ef_search"; static const char* const OB_SV_DELAY_KEY_WRITE = "delay_key_write"; static const char* const OB_SV_INNODB_LARGE_PREFIX = "innodb_large_prefix"; static const char* const OB_SV_KEY_BUFFER_SIZE = "key_buffer_size"; diff --git a/src/share/system_variable/ob_system_variable_factory.cpp b/src/share/system_variable/ob_system_variable_factory.cpp index 765430bcda..84269a86f9 100644 --- a/src/share/system_variable/ob_system_variable_factory.cpp +++ b/src/share/system_variable/ob_system_variable_factory.cpp @@ -911,6 +911,7 @@ const char *ObSysVarFactory::SYS_VAR_NAMES_SORTED_BY_NAME[] = { "ob_enable_transmission_checksum", "ob_enable_truncate_flashback", "ob_global_debug_sync", + "ob_hnsw_ef_search", "ob_interm_result_mem_limit", "ob_kv_mode", "ob_last_schema_version", @@ -1518,6 +1519,7 @@ const ObSysVarClassType ObSysVarFactory::SYS_VAR_IDS_SORTED_BY_NAME[] = { SYS_VAR_OB_ENABLE_TRANSMISSION_CHECKSUM, SYS_VAR_OB_ENABLE_TRUNCATE_FLASHBACK, SYS_VAR_OB_GLOBAL_DEBUG_SYNC, + SYS_VAR_OB_HNSW_EF_SEARCH, SYS_VAR_OB_INTERM_RESULT_MEM_LIMIT, SYS_VAR_OB_KV_MODE, SYS_VAR_OB_LAST_SCHEMA_VERSION, @@ -2222,6 +2224,7 @@ const char *ObSysVarFactory::SYS_VAR_NAMES_SORTED_BY_ID[] = { "group_replication_group_seeds", "slave_rows_search_algorithms", "slave_type_conversions", + "ob_hnsw_ef_search", "delay_key_write", "innodb_large_prefix", "key_buffer_size", @@ -3030,6 +3033,7 @@ int ObSysVarFactory::create_all_sys_vars() + sizeof(ObSysVarGroupReplicationGroupSeeds) + sizeof(ObSysVarSlaveRowsSearchAlgorithms) + sizeof(ObSysVarSlaveTypeConversions) + + sizeof(ObSysVarObHnswEfSearch) + sizeof(ObSysVarDelayKeyWrite) + sizeof(ObSysVarInnodbLargePrefix) + sizeof(ObSysVarKeyBufferSize) @@ -7626,6 +7630,15 @@ int ObSysVarFactory::create_all_sys_vars() ptr = (void *)((char *)ptr + sizeof(ObSysVarSlaveTypeConversions)); } } + if (OB_SUCC(ret)) { + if (OB_ISNULL(sys_var_ptr = new (ptr)ObSysVarObHnswEfSearch())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_ERROR("fail to new ObSysVarObHnswEfSearch", K(ret)); + } else { + store_buf_[ObSysVarsToIdxMap::get_store_idx(static_cast(SYS_VAR_OB_HNSW_EF_SEARCH))] = sys_var_ptr; + ptr = (void *)((char *)ptr + sizeof(ObSysVarObHnswEfSearch)); + } + } if (OB_SUCC(ret)) { if (OB_ISNULL(sys_var_ptr = new (ptr)ObSysVarDelayKeyWrite())) { ret = OB_ALLOCATE_MEMORY_FAILED; @@ -14068,6 +14081,17 @@ int ObSysVarFactory::create_sys_var(ObIAllocator &allocator_, ObSysVarClassType } break; } + case SYS_VAR_OB_HNSW_EF_SEARCH: { + void *ptr = NULL; + if (OB_ISNULL(ptr = allocator_.alloc(sizeof(ObSysVarObHnswEfSearch)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_ERROR("fail to alloc memory", K(ret), K(sizeof(ObSysVarObHnswEfSearch))); + } else if (OB_ISNULL(sys_var_ptr = new (ptr)ObSysVarObHnswEfSearch())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_ERROR("fail to new ObSysVarObHnswEfSearch", K(ret)); + } + break; + } case SYS_VAR_DELAY_KEY_WRITE: { void *ptr = NULL; if (OB_ISNULL(ptr = allocator_.alloc(sizeof(ObSysVarDelayKeyWrite)))) { diff --git a/src/share/system_variable/ob_system_variable_factory.h b/src/share/system_variable/ob_system_variable_factory.h index f4a7c15d3b..717d3ff5c5 100644 --- a/src/share/system_variable/ob_system_variable_factory.h +++ b/src/share/system_variable/ob_system_variable_factory.h @@ -3649,6 +3649,13 @@ public: inline virtual ObSysVarClassType get_type() const { return SYS_VAR_SLAVE_TYPE_CONVERSIONS; } inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(497); } }; +class ObSysVarObHnswEfSearch : public ObIntSysVar +{ +public: + ObSysVarObHnswEfSearch() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} + inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OB_HNSW_EF_SEARCH; } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(498); } +}; class ObSysVarDelayKeyWrite : public ObEnumSysVar { public: @@ -3656,7 +3663,7 @@ public: public: ObSysVarDelayKeyWrite() : ObEnumSysVar(DELAY_KEY_WRITE_NAMES, NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_DELAY_KEY_WRITE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(498); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(499); } }; class ObSysVarInnodbLargePrefix : public ObEnumSysVar { @@ -3665,35 +3672,35 @@ public: public: ObSysVarInnodbLargePrefix() : ObEnumSysVar(INNODB_LARGE_PREFIX_NAMES, NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_INNODB_LARGE_PREFIX; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(499); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(500); } }; class ObSysVarKeyBufferSize : public ObIntSysVar { public: ObSysVarKeyBufferSize() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEY_BUFFER_SIZE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(500); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(501); } }; class ObSysVarKeyCacheAgeThreshold : public ObIntSysVar { public: ObSysVarKeyCacheAgeThreshold() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEY_CACHE_AGE_THRESHOLD; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(501); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(502); } }; class ObSysVarKeyCacheDivisionLimit : public ObIntSysVar { public: ObSysVarKeyCacheDivisionLimit() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEY_CACHE_DIVISION_LIMIT; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(502); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(503); } }; class ObSysVarMaxSeeksForKey : public ObIntSysVar { public: ObSysVarMaxSeeksForKey() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_MAX_SEEKS_FOR_KEY; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(503); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(504); } }; class ObSysVarOldAlterTable : public ObEnumSysVar { @@ -3702,28 +3709,28 @@ public: public: ObSysVarOldAlterTable() : ObEnumSysVar(OLD_ALTER_TABLE_NAMES, NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OLD_ALTER_TABLE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(504); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(505); } }; class ObSysVarTableDefinitionCache : public ObIntSysVar { public: ObSysVarTableDefinitionCache() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_TABLE_DEFINITION_CACHE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(505); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(506); } }; class ObSysVarInnodbSortBufferSize : public ObIntSysVar { public: ObSysVarInnodbSortBufferSize() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_INNODB_SORT_BUFFER_SIZE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(506); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(507); } }; class ObSysVarKeyCacheBlockSize : public ObIntSysVar { public: ObSysVarKeyCacheBlockSize() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEY_CACHE_BLOCK_SIZE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(507); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(508); } }; class ObSysVarObKvMode : public ObEnumSysVar { @@ -3732,350 +3739,350 @@ public: public: ObSysVarObKvMode() : ObEnumSysVar(OB_KV_MODE_NAMES, NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OB_KV_MODE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(508); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(509); } }; class ObSysVarObEnableParameterAnonymousBlock : public ObBoolSysVar { public: ObSysVarObEnableParameterAnonymousBlock() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OB_ENABLE_PARAMETER_ANONYMOUS_BLOCK; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(509); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(510); } }; class ObSysVarCharacterSetsDir : public ObVarcharSysVar { public: ObSysVarCharacterSetsDir() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_CHARACTER_SETS_DIR; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(510); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(511); } }; class ObSysVarDateFormat : public ObVarcharSysVar { public: ObSysVarDateFormat() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_DATE_FORMAT; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(511); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(512); } }; class ObSysVarDatetimeFormat : public ObVarcharSysVar { public: ObSysVarDatetimeFormat() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_DATETIME_FORMAT; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(512); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(513); } }; class ObSysVarDisconnectOnExpiredPassword : public ObBoolSysVar { public: ObSysVarDisconnectOnExpiredPassword() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_DISCONNECT_ON_EXPIRED_PASSWORD; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(513); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(514); } }; class ObSysVarExternalUser : public ObVarcharSysVar { public: ObSysVarExternalUser() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_EXTERNAL_USER; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(514); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(515); } }; class ObSysVarHaveCrypt : public ObVarcharSysVar { public: ObSysVarHaveCrypt() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_HAVE_CRYPT; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(515); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(516); } }; class ObSysVarHaveDynamicLoading : public ObVarcharSysVar { public: ObSysVarHaveDynamicLoading() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_HAVE_DYNAMIC_LOADING; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(516); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(517); } }; class ObSysVarKeyringAwsConfFile : public ObVarcharSysVar { public: ObSysVarKeyringAwsConfFile() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEYRING_AWS_CONF_FILE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(517); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(518); } }; class ObSysVarKeyringAwsDataFile : public ObVarcharSysVar { public: ObSysVarKeyringAwsDataFile() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEYRING_AWS_DATA_FILE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(518); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(519); } }; class ObSysVarLanguage : public ObVarcharSysVar { public: ObSysVarLanguage() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_LANGUAGE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(519); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(520); } }; class ObSysVarLcMessagesDir : public ObVarcharSysVar { public: ObSysVarLcMessagesDir() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_LC_MESSAGES_DIR; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(520); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(521); } }; class ObSysVarLowerCaseFileSystem : public ObBoolSysVar { public: ObSysVarLowerCaseFileSystem() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_LOWER_CASE_FILE_SYSTEM; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(521); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(522); } }; class ObSysVarMaxDigestLength : public ObIntSysVar { public: ObSysVarMaxDigestLength() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_MAX_DIGEST_LENGTH; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(522); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(523); } }; class ObSysVarNdbinfoDatabase : public ObVarcharSysVar { public: ObSysVarNdbinfoDatabase() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDBINFO_DATABASE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(523); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(524); } }; class ObSysVarNdbinfoTablePrefix : public ObVarcharSysVar { public: ObSysVarNdbinfoTablePrefix() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDBINFO_TABLE_PREFIX; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(524); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(525); } }; class ObSysVarNdbinfoVersion : public ObVarcharSysVar { public: ObSysVarNdbinfoVersion() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDBINFO_VERSION; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(525); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(526); } }; class ObSysVarNdbBatchSize : public ObIntSysVar { public: ObSysVarNdbBatchSize() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_BATCH_SIZE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(526); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(527); } }; class ObSysVarNdbClusterConnectionPool : public ObIntSysVar { public: ObSysVarNdbClusterConnectionPool() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_CLUSTER_CONNECTION_POOL; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(527); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(528); } }; class ObSysVarNdbClusterConnectionPoolNodeids : public ObVarcharSysVar { public: ObSysVarNdbClusterConnectionPoolNodeids() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_CLUSTER_CONNECTION_POOL_NODEIDS; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(528); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(529); } }; class ObSysVarNdbLogApplyStatus : public ObBoolSysVar { public: ObSysVarNdbLogApplyStatus() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_LOG_APPLY_STATUS; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(529); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(530); } }; class ObSysVarNdbLogBin : public ObBoolSysVar { public: ObSysVarNdbLogBin() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_LOG_BIN; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(530); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(531); } }; class ObSysVarNdbLogFailTerminate : public ObBoolSysVar { public: ObSysVarNdbLogFailTerminate() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_LOG_FAIL_TERMINATE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(531); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(532); } }; class ObSysVarNdbLogOrig : public ObBoolSysVar { public: ObSysVarNdbLogOrig() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_LOG_ORIG; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(532); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(533); } }; class ObSysVarNdbLogTransactionId : public ObBoolSysVar { public: ObSysVarNdbLogTransactionId() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_LOG_TRANSACTION_ID; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(533); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(534); } }; class ObSysVarNdbOptimizedNodeSelection : public ObIntSysVar { public: ObSysVarNdbOptimizedNodeSelection() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_OPTIMIZED_NODE_SELECTION; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(534); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(535); } }; class ObSysVarNdbSystemName : public ObVarcharSysVar { public: ObSysVarNdbSystemName() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_SYSTEM_NAME; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(535); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(536); } }; class ObSysVarNdbUseCopyingAlterTable : public ObBoolSysVar { public: ObSysVarNdbUseCopyingAlterTable() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_USE_COPYING_ALTER_TABLE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(536); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(537); } }; class ObSysVarNdbVersionString : public ObVarcharSysVar { public: ObSysVarNdbVersionString() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_VERSION_STRING; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(537); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(538); } }; class ObSysVarNdbWaitConnected : public ObIntSysVar { public: ObSysVarNdbWaitConnected() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_WAIT_CONNECTED; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(538); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(539); } }; class ObSysVarNdbWaitSetup : public ObIntSysVar { public: ObSysVarNdbWaitSetup() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_WAIT_SETUP; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(539); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(540); } }; class ObSysVarProxyUser : public ObVarcharSysVar { public: ObSysVarProxyUser() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_PROXY_USER; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(540); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(541); } }; class ObSysVarSha256PasswordAutoGenerateRsaKeys : public ObBoolSysVar { public: ObSysVarSha256PasswordAutoGenerateRsaKeys() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_SHA256_PASSWORD_AUTO_GENERATE_RSA_KEYS; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(541); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(542); } }; class ObSysVarSha256PasswordPrivateKeyPath : public ObVarcharSysVar { public: ObSysVarSha256PasswordPrivateKeyPath() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_SHA256_PASSWORD_PRIVATE_KEY_PATH; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(542); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(543); } }; class ObSysVarSha256PasswordPublicKeyPath : public ObVarcharSysVar { public: ObSysVarSha256PasswordPublicKeyPath() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_SHA256_PASSWORD_PUBLIC_KEY_PATH; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(543); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(544); } }; class ObSysVarSkipShowDatabase : public ObVarcharSysVar { public: ObSysVarSkipShowDatabase() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_SKIP_SHOW_DATABASE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(544); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(545); } }; class ObSysVarPluginLoad : public ObVarcharSysVar { public: ObSysVarPluginLoad() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_PLUGIN_LOAD; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(545); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(546); } }; class ObSysVarPluginLoadAdd : public ObVarcharSysVar { public: ObSysVarPluginLoadAdd() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_PLUGIN_LOAD_ADD; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(546); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(547); } }; class ObSysVarBigTables : public ObBoolSysVar { public: ObSysVarBigTables() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_BIG_TABLES; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(547); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(548); } }; class ObSysVarCheckProxyUsers : public ObBoolSysVar { public: ObSysVarCheckProxyUsers() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_CHECK_PROXY_USERS; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(548); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(549); } }; class ObSysVarConnectionControlFailedConnectionsThreshold : public ObIntSysVar { public: ObSysVarConnectionControlFailedConnectionsThreshold() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_CONNECTION_CONTROL_FAILED_CONNECTIONS_THRESHOLD; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(549); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(550); } }; class ObSysVarConnectionControlMaxConnectionDelay : public ObIntSysVar { public: ObSysVarConnectionControlMaxConnectionDelay() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_CONNECTION_CONTROL_MAX_CONNECTION_DELAY; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(550); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(551); } }; class ObSysVarConnectionControlMinConnectionDelay : public ObIntSysVar { public: ObSysVarConnectionControlMinConnectionDelay() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_CONNECTION_CONTROL_MIN_CONNECTION_DELAY; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(551); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(552); } }; class ObSysVarDefaultWeekFormat : public ObIntSysVar { public: ObSysVarDefaultWeekFormat() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_DEFAULT_WEEK_FORMAT; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(552); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(553); } }; class ObSysVarDelayedInsertTimeout : public ObIntSysVar { public: ObSysVarDelayedInsertTimeout() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_DELAYED_INSERT_TIMEOUT; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(553); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(554); } }; class ObSysVarDelayedQueueSize : public ObIntSysVar { public: ObSysVarDelayedQueueSize() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_DELAYED_QUEUE_SIZE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(554); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(555); } }; class ObSysVarEqRangeIndexDiveLimit : public ObIntSysVar { public: ObSysVarEqRangeIndexDiveLimit() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_EQ_RANGE_INDEX_DIVE_LIMIT; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(555); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(556); } }; class ObSysVarInnodbStatsAutoRecalc : public ObBoolSysVar { public: ObSysVarInnodbStatsAutoRecalc() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_INNODB_STATS_AUTO_RECALC; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(556); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(557); } }; class ObSysVarInnodbStatsIncludeDeleteMarked : public ObBoolSysVar { public: ObSysVarInnodbStatsIncludeDeleteMarked() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_INNODB_STATS_INCLUDE_DELETE_MARKED; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(557); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(558); } }; class ObSysVarInnodbStatsMethod : public ObEnumSysVar { @@ -4084,49 +4091,49 @@ public: public: ObSysVarInnodbStatsMethod() : ObEnumSysVar(INNODB_STATS_METHOD_NAMES, NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_INNODB_STATS_METHOD; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(558); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(559); } }; class ObSysVarInnodbStatsOnMetadata : public ObBoolSysVar { public: ObSysVarInnodbStatsOnMetadata() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_INNODB_STATS_ON_METADATA; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(559); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(560); } }; class ObSysVarVersionTokensSession : public ObVarcharSysVar { public: ObSysVarVersionTokensSession() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_VERSION_TOKENS_SESSION; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(560); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(561); } }; class ObSysVarInnodbStatsPersistentSamplePages : public ObIntSysVar { public: ObSysVarInnodbStatsPersistentSamplePages() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_INNODB_STATS_PERSISTENT_SAMPLE_PAGES; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(561); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(562); } }; class ObSysVarInnodbStatsSamplePages : public ObIntSysVar { public: ObSysVarInnodbStatsSamplePages() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_INNODB_STATS_SAMPLE_PAGES; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(562); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(563); } }; class ObSysVarInnodbStatsTransientSamplePages : public ObIntSysVar { public: ObSysVarInnodbStatsTransientSamplePages() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_INNODB_STATS_TRANSIENT_SAMPLE_PAGES; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(563); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(564); } }; class ObSysVarKeyringAwsCmkId : public ObVarcharSysVar { public: ObSysVarKeyringAwsCmkId() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEYRING_AWS_CMK_ID; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(564); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(565); } }; class ObSysVarKeyringAwsRegion : public ObEnumSysVar { @@ -4135,91 +4142,91 @@ public: public: ObSysVarKeyringAwsRegion() : ObEnumSysVar(KEYRING_AWS_REGION_NAMES, NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEYRING_AWS_REGION; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(565); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(566); } }; class ObSysVarKeyringEncryptedFileData : public ObVarcharSysVar { public: ObSysVarKeyringEncryptedFileData() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEYRING_ENCRYPTED_FILE_DATA; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(566); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(567); } }; class ObSysVarKeyringEncryptedFilePassword : public ObVarcharSysVar { public: ObSysVarKeyringEncryptedFilePassword() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEYRING_ENCRYPTED_FILE_PASSWORD; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(567); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(568); } }; class ObSysVarKeyringFileData : public ObVarcharSysVar { public: ObSysVarKeyringFileData() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEYRING_FILE_DATA; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(568); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(569); } }; class ObSysVarKeyringOkvConfDir : public ObVarcharSysVar { public: ObSysVarKeyringOkvConfDir() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEYRING_OKV_CONF_DIR; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(569); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(570); } }; class ObSysVarKeyringOperations : public ObBoolSysVar { public: ObSysVarKeyringOperations() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_KEYRING_OPERATIONS; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(570); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(571); } }; class ObSysVarOptimizerSwitch : public ObVarcharSysVar { public: ObSysVarOptimizerSwitch() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OPTIMIZER_SWITCH; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(571); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(572); } }; class ObSysVarMaxConnectErrors : public ObIntSysVar { public: ObSysVarMaxConnectErrors() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_MAX_CONNECT_ERRORS; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(572); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(573); } }; class ObSysVarMysqlFirewallMode : public ObBoolSysVar { public: ObSysVarMysqlFirewallMode() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_MYSQL_FIREWALL_MODE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(573); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(574); } }; class ObSysVarMysqlFirewallTrace : public ObBoolSysVar { public: ObSysVarMysqlFirewallTrace() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_MYSQL_FIREWALL_TRACE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(574); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(575); } }; class ObSysVarMysqlNativePasswordProxyUsers : public ObBoolSysVar { public: ObSysVarMysqlNativePasswordProxyUsers() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_MYSQL_NATIVE_PASSWORD_PROXY_USERS; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(575); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(576); } }; class ObSysVarNetRetryCount : public ObIntSysVar { public: ObSysVarNetRetryCount() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NET_RETRY_COUNT; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(576); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(577); } }; class ObSysVarNew : public ObBoolSysVar { public: ObSysVarNew() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NEW; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(577); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(578); } }; class ObSysVarOldPasswords : public ObEnumSysVar { @@ -4228,147 +4235,147 @@ public: public: ObSysVarOldPasswords() : ObEnumSysVar(OLD_PASSWORDS_NAMES, NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OLD_PASSWORDS; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(578); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(579); } }; class ObSysVarOptimizerPruneLevel : public ObIntSysVar { public: ObSysVarOptimizerPruneLevel() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OPTIMIZER_PRUNE_LEVEL; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(579); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(580); } }; class ObSysVarOptimizerSearchDepth : public ObIntSysVar { public: ObSysVarOptimizerSearchDepth() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OPTIMIZER_SEARCH_DEPTH; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(580); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(581); } }; class ObSysVarOptimizerTrace : public ObVarcharSysVar { public: ObSysVarOptimizerTrace() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OPTIMIZER_TRACE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(581); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(582); } }; class ObSysVarOptimizerTraceFeatures : public ObVarcharSysVar { public: ObSysVarOptimizerTraceFeatures() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OPTIMIZER_TRACE_FEATURES; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(582); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(583); } }; class ObSysVarOptimizerTraceLimit : public ObIntSysVar { public: ObSysVarOptimizerTraceLimit() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OPTIMIZER_TRACE_LIMIT; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(583); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(584); } }; class ObSysVarOptimizerTraceMaxMemSize : public ObIntSysVar { public: ObSysVarOptimizerTraceMaxMemSize() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OPTIMIZER_TRACE_MAX_MEM_SIZE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(584); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(585); } }; class ObSysVarOptimizerTraceOffset : public ObIntSysVar { public: ObSysVarOptimizerTraceOffset() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_OPTIMIZER_TRACE_OFFSET; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(585); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(586); } }; class ObSysVarParserMaxMemSize : public ObIntSysVar { public: ObSysVarParserMaxMemSize() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_PARSER_MAX_MEM_SIZE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(586); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(587); } }; class ObSysVarRandSeed1 : public ObIntSysVar { public: ObSysVarRandSeed1() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_RAND_SEED1; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(587); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(588); } }; class ObSysVarRandSeed2 : public ObIntSysVar { public: ObSysVarRandSeed2() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_RAND_SEED2; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(588); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(589); } }; class ObSysVarRangeAllocBlockSize : public ObIntSysVar { public: ObSysVarRangeAllocBlockSize() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_RANGE_ALLOC_BLOCK_SIZE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(589); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(590); } }; class ObSysVarRangeOptimizerMaxMemSize : public ObIntSysVar { public: ObSysVarRangeOptimizerMaxMemSize() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_RANGE_OPTIMIZER_MAX_MEM_SIZE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(590); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(591); } }; class ObSysVarRewriterEnabled : public ObBoolSysVar { public: ObSysVarRewriterEnabled() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_REWRITER_ENABLED; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(591); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(592); } }; class ObSysVarRewriterVerbose : public ObIntSysVar { public: ObSysVarRewriterVerbose() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_REWRITER_VERBOSE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(592); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(593); } }; class ObSysVarSecureAuth : public ObBoolSysVar { public: ObSysVarSecureAuth() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_SECURE_AUTH; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(593); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(594); } }; class ObSysVarSha256PasswordProxyUsers : public ObBoolSysVar { public: ObSysVarSha256PasswordProxyUsers() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_SHA256_PASSWORD_PROXY_USERS; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(594); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(595); } }; class ObSysVarShowCompatibility56 : public ObBoolSysVar { public: ObSysVarShowCompatibility56() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_SHOW_COMPATIBILITY_56; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(595); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(596); } }; class ObSysVarShowCreateTableVerbosity : public ObBoolSysVar { public: ObSysVarShowCreateTableVerbosity() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_SHOW_CREATE_TABLE_VERBOSITY; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(596); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(597); } }; class ObSysVarShowOldTemporals : public ObBoolSysVar { public: ObSysVarShowOldTemporals() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_SHOW_OLD_TEMPORALS; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(597); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(598); } }; class ObSysVarSqlBigSelects : public ObBoolSysVar { public: ObSysVarSqlBigSelects() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_SQL_BIG_SELECTS; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(598); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(599); } }; class ObSysVarUpdatableViewsWithLimit : public ObEnumSysVar { @@ -4377,35 +4384,35 @@ public: public: ObSysVarUpdatableViewsWithLimit() : ObEnumSysVar(UPDATABLE_VIEWS_WITH_LIMIT_NAMES, NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_UPDATABLE_VIEWS_WITH_LIMIT; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(599); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(600); } }; class ObSysVarValidatePasswordDictionaryFile : public ObVarcharSysVar { public: ObSysVarValidatePasswordDictionaryFile() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_VALIDATE_PASSWORD_DICTIONARY_FILE; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(600); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(601); } }; class ObSysVarDelayedInsertLimit : public ObIntSysVar { public: ObSysVarDelayedInsertLimit() : ObIntSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_DELAYED_INSERT_LIMIT; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(601); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(602); } }; class ObSysVarNdbVersion : public ObVarcharSysVar { public: ObSysVarNdbVersion() : ObVarcharSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_NDB_VERSION; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(602); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(603); } }; class ObSysVarAutoGenerateCerts : public ObBoolSysVar { public: ObSysVarAutoGenerateCerts() : ObBoolSysVar(NULL, NULL, NULL, NULL, NULL) {} inline virtual ObSysVarClassType get_type() const { return SYS_VAR_AUTO_GENERATE_CERTS; } - inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(603); } + inline virtual const common::ObObj &get_global_default_value() const { return ObSysVariables::get_default_value(604); } }; @@ -4430,7 +4437,7 @@ private: public: const static int64_t MYSQL_SYS_VARS_COUNT = 99; - const static int64_t OB_SYS_VARS_COUNT = 505; + const static int64_t OB_SYS_VARS_COUNT = 506; const static int64_t ALL_SYS_VARS_COUNT = MYSQL_SYS_VARS_COUNT + OB_SYS_VARS_COUNT; const static int64_t INVALID_MAX_READ_STALE_TIME = -1; diff --git a/src/share/system_variable/ob_system_variable_init.cpp b/src/share/system_variable/ob_system_variable_init.cpp index 085bec9777..78b0054362 100644 --- a/src/share/system_variable/ob_system_variable_init.cpp +++ b/src/share/system_variable/ob_system_variable_init.cpp @@ -6961,1453 +6961,1468 @@ static struct VarsInit{ }(); [&] (){ - ObSysVars[498].default_value_ = "0" ; - ObSysVars[498].info_ = "This variable specifies how to use delayed key writes. It applies only to MyISAM tables. Delayed key writing causes key buffers not to be flushed between writes, merely simulates MySQL 5.7" ; - ObSysVars[498].name_ = "delay_key_write" ; - ObSysVars[498].data_type_ = ObIntType ; - ObSysVars[498].enum_names_ = "[u'ON', u'OFF', u'ALL']" ; - ObSysVars[498].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[498].id_ = SYS_VAR_DELAY_KEY_WRITE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DELAY_KEY_WRITE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_DELAY_KEY_WRITE] = 498 ; - ObSysVars[498].base_value_ = "0" ; - ObSysVars[498].alias_ = "OB_SV_DELAY_KEY_WRITE" ; + ObSysVars[498].default_value_ = "64" ; + ObSysVars[498].info_ = "The number of neighbor nodes considered during any HNSW vector index search on the session" ; + ObSysVars[498].name_ = "ob_hnsw_ef_search" ; + ObSysVars[498].data_type_ = ObUInt64Type ; + ObSysVars[498].min_val_ = "1" ; + ObSysVars[498].max_val_ = "1000" ; + ObSysVars[498].flags_ = ObSysVarFlag::SESSION_SCOPE ; + ObSysVars[498].id_ = SYS_VAR_OB_HNSW_EF_SEARCH ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OB_HNSW_EF_SEARCH)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_OB_HNSW_EF_SEARCH] = 498 ; + ObSysVars[498].base_value_ = "40" ; + ObSysVars[498].alias_ = "OB_SV_HNSW_EF_SEARCH" ; }(); [&] (){ ObSysVars[499].default_value_ = "0" ; - ObSysVars[499].info_ = "When this option is enabled, index key prefixes longer than 767 bytes (up to 3072 bytes) are allowed for InnoDB tables that use DYNAMIC or COMPRESSED row format, merely simulates MySQL 5.7" ; - ObSysVars[499].name_ = "innodb_large_prefix" ; + ObSysVars[499].info_ = "This variable specifies how to use delayed key writes. It applies only to MyISAM tables. Delayed key writing causes key buffers not to be flushed between writes, merely simulates MySQL 5.7" ; + ObSysVars[499].name_ = "delay_key_write" ; ObSysVars[499].data_type_ = ObIntType ; - ObSysVars[499].enum_names_ = "[u'ON', u'OFF']" ; + ObSysVars[499].enum_names_ = "[u'ON', u'OFF', u'ALL']" ; ObSysVars[499].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[499].id_ = SYS_VAR_INNODB_LARGE_PREFIX ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_LARGE_PREFIX)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_LARGE_PREFIX] = 499 ; + ObSysVars[499].id_ = SYS_VAR_DELAY_KEY_WRITE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DELAY_KEY_WRITE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_DELAY_KEY_WRITE] = 499 ; ObSysVars[499].base_value_ = "0" ; - ObSysVars[499].alias_ = "OB_SV_INNODB_LARGE_PREFIX" ; + ObSysVars[499].alias_ = "OB_SV_DELAY_KEY_WRITE" ; }(); [&] (){ - ObSysVars[500].default_value_ = "8388608" ; - ObSysVars[500].info_ = "The size of the buffer used for index blocks, merely simulates MySQL 5.7" ; - ObSysVars[500].name_ = "key_buffer_size" ; + ObSysVars[500].default_value_ = "0" ; + ObSysVars[500].info_ = "When this option is enabled, index key prefixes longer than 767 bytes (up to 3072 bytes) are allowed for InnoDB tables that use DYNAMIC or COMPRESSED row format, merely simulates MySQL 5.7" ; + ObSysVars[500].name_ = "innodb_large_prefix" ; ObSysVars[500].data_type_ = ObIntType ; - ObSysVars[500].min_val_ = "0" ; - ObSysVars[500].max_val_ = "4294967295" ; + ObSysVars[500].enum_names_ = "[u'ON', u'OFF']" ; ObSysVars[500].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[500].id_ = SYS_VAR_KEY_BUFFER_SIZE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEY_BUFFER_SIZE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEY_BUFFER_SIZE] = 500 ; - ObSysVars[500].base_value_ = "8388608" ; - ObSysVars[500].alias_ = "OB_SV_KEY_BUFFER_SIZE" ; + ObSysVars[500].id_ = SYS_VAR_INNODB_LARGE_PREFIX ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_LARGE_PREFIX)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_LARGE_PREFIX] = 500 ; + ObSysVars[500].base_value_ = "0" ; + ObSysVars[500].alias_ = "OB_SV_INNODB_LARGE_PREFIX" ; }(); [&] (){ - ObSysVars[501].default_value_ = "300" ; - ObSysVars[501].info_ = "This value controls the demotion of buffers from the hot sublist of a key cache to the warm sublist. Lower values cause demotion to happen more quickly, merely simulates MySQL 5.7" ; - ObSysVars[501].name_ = "key_cache_age_threshold" ; - ObSysVars[501].data_type_ = ObUInt64Type ; - ObSysVars[501].min_val_ = "100" ; - ObSysVars[501].max_val_ = "18446744073709551516" ; + ObSysVars[501].default_value_ = "8388608" ; + ObSysVars[501].info_ = "The size of the buffer used for index blocks, merely simulates MySQL 5.7" ; + ObSysVars[501].name_ = "key_buffer_size" ; + ObSysVars[501].data_type_ = ObIntType ; + ObSysVars[501].min_val_ = "0" ; + ObSysVars[501].max_val_ = "4294967295" ; ObSysVars[501].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[501].id_ = SYS_VAR_KEY_CACHE_AGE_THRESHOLD ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEY_CACHE_AGE_THRESHOLD)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEY_CACHE_AGE_THRESHOLD] = 501 ; - ObSysVars[501].base_value_ = "300" ; - ObSysVars[501].alias_ = "OB_SV_KEY_CACHE_AGE_THRESHOLD" ; + ObSysVars[501].id_ = SYS_VAR_KEY_BUFFER_SIZE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEY_BUFFER_SIZE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEY_BUFFER_SIZE] = 501 ; + ObSysVars[501].base_value_ = "8388608" ; + ObSysVars[501].alias_ = "OB_SV_KEY_BUFFER_SIZE" ; }(); [&] (){ - ObSysVars[502].default_value_ = "100" ; - ObSysVars[502].info_ = "The division point between the hot and warm sublists of the key cache buffer list, merely simulates MySQL 5.7" ; - ObSysVars[502].name_ = "key_cache_division_limit" ; - ObSysVars[502].data_type_ = ObIntType ; - ObSysVars[502].min_val_ = "1" ; - ObSysVars[502].max_val_ = "100" ; + ObSysVars[502].default_value_ = "300" ; + ObSysVars[502].info_ = "This value controls the demotion of buffers from the hot sublist of a key cache to the warm sublist. Lower values cause demotion to happen more quickly, merely simulates MySQL 5.7" ; + ObSysVars[502].name_ = "key_cache_age_threshold" ; + ObSysVars[502].data_type_ = ObUInt64Type ; + ObSysVars[502].min_val_ = "100" ; + ObSysVars[502].max_val_ = "18446744073709551516" ; ObSysVars[502].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[502].id_ = SYS_VAR_KEY_CACHE_DIVISION_LIMIT ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEY_CACHE_DIVISION_LIMIT)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEY_CACHE_DIVISION_LIMIT] = 502 ; - ObSysVars[502].base_value_ = "100" ; - ObSysVars[502].alias_ = "OB_SV_KEY_CACHE_DIVISION_LIMIT" ; + ObSysVars[502].id_ = SYS_VAR_KEY_CACHE_AGE_THRESHOLD ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEY_CACHE_AGE_THRESHOLD)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEY_CACHE_AGE_THRESHOLD] = 502 ; + ObSysVars[502].base_value_ = "300" ; + ObSysVars[502].alias_ = "OB_SV_KEY_CACHE_AGE_THRESHOLD" ; }(); [&] (){ - ObSysVars[503].default_value_ = "18446744073709551615" ; - ObSysVars[503].info_ = "Limit the assumed maximum number of seeks when looking up rows based on a key, merely simulates MySQL 5.7" ; - ObSysVars[503].name_ = "max_seeks_for_key" ; - ObSysVars[503].data_type_ = ObUInt64Type ; + ObSysVars[503].default_value_ = "100" ; + ObSysVars[503].info_ = "The division point between the hot and warm sublists of the key cache buffer list, merely simulates MySQL 5.7" ; + ObSysVars[503].name_ = "key_cache_division_limit" ; + ObSysVars[503].data_type_ = ObIntType ; ObSysVars[503].min_val_ = "1" ; - ObSysVars[503].max_val_ = "18446744073709551615" ; - ObSysVars[503].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[503].id_ = SYS_VAR_MAX_SEEKS_FOR_KEY ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_MAX_SEEKS_FOR_KEY)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_MAX_SEEKS_FOR_KEY] = 503 ; - ObSysVars[503].base_value_ = "18446744073709551615" ; - ObSysVars[503].alias_ = "OB_SV_MAX_SEEKS_FOR_KEY" ; + ObSysVars[503].max_val_ = "100" ; + ObSysVars[503].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[503].id_ = SYS_VAR_KEY_CACHE_DIVISION_LIMIT ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEY_CACHE_DIVISION_LIMIT)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEY_CACHE_DIVISION_LIMIT] = 503 ; + ObSysVars[503].base_value_ = "100" ; + ObSysVars[503].alias_ = "OB_SV_KEY_CACHE_DIVISION_LIMIT" ; }(); [&] (){ - ObSysVars[504].default_value_ = "0" ; - ObSysVars[504].info_ = "When this variable is enabled, the server does not use the optimized method of processing an ALTER TABLE operation, merely simulates MySQL 5.7" ; - ObSysVars[504].name_ = "old_alter_table" ; - ObSysVars[504].data_type_ = ObIntType ; - ObSysVars[504].enum_names_ = "[u'OFF', u'ON']" ; + ObSysVars[504].default_value_ = "18446744073709551615" ; + ObSysVars[504].info_ = "Limit the assumed maximum number of seeks when looking up rows based on a key, merely simulates MySQL 5.7" ; + ObSysVars[504].name_ = "max_seeks_for_key" ; + ObSysVars[504].data_type_ = ObUInt64Type ; + ObSysVars[504].min_val_ = "1" ; + ObSysVars[504].max_val_ = "18446744073709551615" ; ObSysVars[504].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[504].id_ = SYS_VAR_OLD_ALTER_TABLE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OLD_ALTER_TABLE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_OLD_ALTER_TABLE] = 504 ; - ObSysVars[504].base_value_ = "0" ; - ObSysVars[504].alias_ = "OB_SV_OLD_ALTER_TABLE" ; + ObSysVars[504].id_ = SYS_VAR_MAX_SEEKS_FOR_KEY ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_MAX_SEEKS_FOR_KEY)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_MAX_SEEKS_FOR_KEY] = 504 ; + ObSysVars[504].base_value_ = "18446744073709551615" ; + ObSysVars[504].alias_ = "OB_SV_MAX_SEEKS_FOR_KEY" ; }(); [&] (){ - ObSysVars[505].default_value_ = "-1" ; - ObSysVars[505].info_ = "The number of table definitions that can be stored in the table definition cache, merely simulates MySQL 5.7" ; - ObSysVars[505].name_ = "table_definition_cache" ; + ObSysVars[505].default_value_ = "0" ; + ObSysVars[505].info_ = "When this variable is enabled, the server does not use the optimized method of processing an ALTER TABLE operation, merely simulates MySQL 5.7" ; + ObSysVars[505].name_ = "old_alter_table" ; ObSysVars[505].data_type_ = ObIntType ; - ObSysVars[505].min_val_ = "400" ; - ObSysVars[505].max_val_ = "524288" ; + ObSysVars[505].enum_names_ = "[u'OFF', u'ON']" ; ObSysVars[505].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[505].id_ = SYS_VAR_TABLE_DEFINITION_CACHE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_TABLE_DEFINITION_CACHE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_TABLE_DEFINITION_CACHE] = 505 ; - ObSysVars[505].base_value_ = "-1" ; - ObSysVars[505].alias_ = "OB_SV_TABLE_DEFINITION_CACHE" ; + ObSysVars[505].id_ = SYS_VAR_OLD_ALTER_TABLE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OLD_ALTER_TABLE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_OLD_ALTER_TABLE] = 505 ; + ObSysVars[505].base_value_ = "0" ; + ObSysVars[505].alias_ = "OB_SV_OLD_ALTER_TABLE" ; }(); [&] (){ - ObSysVars[506].default_value_ = "1048576" ; - ObSysVars[506].info_ = "The sort buffer size for online DDL operations that create or rebuild secondary indexes, merely simulates MySQL 5.7" ; - ObSysVars[506].name_ = "innodb_sort_buffer_size" ; + ObSysVars[506].default_value_ = "-1" ; + ObSysVars[506].info_ = "The number of table definitions that can be stored in the table definition cache, merely simulates MySQL 5.7" ; + ObSysVars[506].name_ = "table_definition_cache" ; ObSysVars[506].data_type_ = ObIntType ; - ObSysVars[506].min_val_ = "65536" ; - ObSysVars[506].max_val_ = "67108864" ; - ObSysVars[506].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[506].id_ = SYS_VAR_INNODB_SORT_BUFFER_SIZE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_SORT_BUFFER_SIZE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_SORT_BUFFER_SIZE] = 506 ; - ObSysVars[506].base_value_ = "1048576" ; - ObSysVars[506].alias_ = "OB_SV_INNODB_SORT_BUFFER_SIZE" ; + ObSysVars[506].min_val_ = "400" ; + ObSysVars[506].max_val_ = "524288" ; + ObSysVars[506].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[506].id_ = SYS_VAR_TABLE_DEFINITION_CACHE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_TABLE_DEFINITION_CACHE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_TABLE_DEFINITION_CACHE] = 506 ; + ObSysVars[506].base_value_ = "-1" ; + ObSysVars[506].alias_ = "OB_SV_TABLE_DEFINITION_CACHE" ; }(); [&] (){ - ObSysVars[507].default_value_ = "1024" ; - ObSysVars[507].info_ = "The size in bytes of blocks in the key cache, merely simulates MySQL 5.7" ; - ObSysVars[507].name_ = "key_cache_block_size" ; + ObSysVars[507].default_value_ = "1048576" ; + ObSysVars[507].info_ = "The sort buffer size for online DDL operations that create or rebuild secondary indexes, merely simulates MySQL 5.7" ; + ObSysVars[507].name_ = "innodb_sort_buffer_size" ; ObSysVars[507].data_type_ = ObIntType ; - ObSysVars[507].min_val_ = "512" ; - ObSysVars[507].max_val_ = "16384" ; - ObSysVars[507].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[507].id_ = SYS_VAR_KEY_CACHE_BLOCK_SIZE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEY_CACHE_BLOCK_SIZE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEY_CACHE_BLOCK_SIZE] = 507 ; - ObSysVars[507].base_value_ = "1024" ; - ObSysVars[507].alias_ = "OB_SV_KEY_CACHE_BLOCK_SIZE" ; + ObSysVars[507].min_val_ = "65536" ; + ObSysVars[507].max_val_ = "67108864" ; + ObSysVars[507].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[507].id_ = SYS_VAR_INNODB_SORT_BUFFER_SIZE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_SORT_BUFFER_SIZE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_SORT_BUFFER_SIZE] = 507 ; + ObSysVars[507].base_value_ = "1048576" ; + ObSysVars[507].alias_ = "OB_SV_INNODB_SORT_BUFFER_SIZE" ; }(); [&] (){ - ObSysVars[508].default_value_ = "0" ; - ObSysVars[508].info_ = "Use this variable to select the interface mode for the OBKV tenant. You can select one of 'ALL, TABLEAPI, HBASE, REDIS, NONE', where 'ALL' is the default and 'NONE' represents the non-OBKV interface mode." ; - ObSysVars[508].name_ = "ob_kv_mode" ; + ObSysVars[508].default_value_ = "1024" ; + ObSysVars[508].info_ = "The size in bytes of blocks in the key cache, merely simulates MySQL 5.7" ; + ObSysVars[508].name_ = "key_cache_block_size" ; ObSysVars[508].data_type_ = ObIntType ; - ObSysVars[508].enum_names_ = "[u'ALL', u'TABLEAPI', u'HBASE', u'REDIS', u'NONE']" ; - ObSysVars[508].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::READONLY ; - ObSysVars[508].id_ = SYS_VAR_OB_KV_MODE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OB_KV_MODE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_OB_KV_MODE] = 508 ; - ObSysVars[508].base_value_ = "0" ; - ObSysVars[508].alias_ = "OB_SV_KV_MODE" ; + ObSysVars[508].min_val_ = "512" ; + ObSysVars[508].max_val_ = "16384" ; + ObSysVars[508].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[508].id_ = SYS_VAR_KEY_CACHE_BLOCK_SIZE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEY_CACHE_BLOCK_SIZE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEY_CACHE_BLOCK_SIZE] = 508 ; + ObSysVars[508].base_value_ = "1024" ; + ObSysVars[508].alias_ = "OB_SV_KEY_CACHE_BLOCK_SIZE" ; }(); [&] (){ - ObSysVars[509].default_value_ = "1" ; - ObSysVars[509].info_ = "wether use parameter anonymous_block" ; - ObSysVars[509].name_ = "ob_enable_parameter_anonymous_block" ; + ObSysVars[509].default_value_ = "0" ; + ObSysVars[509].info_ = "Use this variable to select the interface mode for the OBKV tenant. You can select one of 'ALL, TABLEAPI, HBASE, REDIS, NONE', where 'ALL' is the default and 'NONE' represents the non-OBKV interface mode." ; + ObSysVars[509].name_ = "ob_kv_mode" ; ObSysVars[509].data_type_ = ObIntType ; - ObSysVars[509].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::NEED_SERIALIZE ; - ObSysVars[509].id_ = SYS_VAR_OB_ENABLE_PARAMETER_ANONYMOUS_BLOCK ; + ObSysVars[509].enum_names_ = "[u'ALL', u'TABLEAPI', u'HBASE', u'REDIS', u'NONE']" ; + ObSysVars[509].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::READONLY ; + ObSysVars[509].id_ = SYS_VAR_OB_KV_MODE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OB_KV_MODE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_OB_KV_MODE] = 509 ; + ObSysVars[509].base_value_ = "0" ; + ObSysVars[509].alias_ = "OB_SV_KV_MODE" ; + }(); + + [&] (){ + ObSysVars[510].default_value_ = "1" ; + ObSysVars[510].info_ = "wether use parameter anonymous_block" ; + ObSysVars[510].name_ = "ob_enable_parameter_anonymous_block" ; + ObSysVars[510].data_type_ = ObIntType ; + ObSysVars[510].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::NEED_SERIALIZE ; + ObSysVars[510].id_ = SYS_VAR_OB_ENABLE_PARAMETER_ANONYMOUS_BLOCK ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OB_ENABLE_PARAMETER_ANONYMOUS_BLOCK)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_OB_ENABLE_PARAMETER_ANONYMOUS_BLOCK] = 509 ; - ObSysVars[509].base_value_ = "1" ; - ObSysVars[509].alias_ = "OB_SV_ENABLE_PARAMETER_ANONYMOUS_BLOCK" ; + ObSysVarsIdToArrayIdx[SYS_VAR_OB_ENABLE_PARAMETER_ANONYMOUS_BLOCK] = 510 ; + ObSysVars[510].base_value_ = "1" ; + ObSysVars[510].alias_ = "OB_SV_ENABLE_PARAMETER_ANONYMOUS_BLOCK" ; }(); [&] (){ - ObSysVars[510].default_value_ = "" ; - ObSysVars[510].info_ = "The directory where character sets are installed" ; - ObSysVars[510].name_ = "character_sets_dir" ; - ObSysVars[510].data_type_ = ObVarcharType ; - ObSysVars[510].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[510].id_ = SYS_VAR_CHARACTER_SETS_DIR ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_CHARACTER_SETS_DIR)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_CHARACTER_SETS_DIR] = 510 ; - ObSysVars[510].base_value_ = "" ; - ObSysVars[510].alias_ = "OB_SV_CHARACTER_SETS_DIR" ; - }(); - - [&] (){ - ObSysVars[511].default_value_ = "%Y-%m-%d" ; - ObSysVars[511].info_ = "" ; - ObSysVars[511].name_ = "date_format" ; + ObSysVars[511].default_value_ = "" ; + ObSysVars[511].info_ = "The directory where character sets are installed" ; + ObSysVars[511].name_ = "character_sets_dir" ; ObSysVars[511].data_type_ = ObVarcharType ; - ObSysVars[511].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[511].id_ = SYS_VAR_DATE_FORMAT ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DATE_FORMAT)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_DATE_FORMAT] = 511 ; - ObSysVars[511].base_value_ = "%Y-%m-%d" ; - ObSysVars[511].alias_ = "OB_SV_DATE_FORMAT" ; + ObSysVars[511].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[511].id_ = SYS_VAR_CHARACTER_SETS_DIR ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_CHARACTER_SETS_DIR)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_CHARACTER_SETS_DIR] = 511 ; + ObSysVars[511].base_value_ = "" ; + ObSysVars[511].alias_ = "OB_SV_CHARACTER_SETS_DIR" ; }(); [&] (){ - ObSysVars[512].default_value_ = "%Y-%m-%d %H:%i:%s" ; + ObSysVars[512].default_value_ = "%Y-%m-%d" ; ObSysVars[512].info_ = "" ; - ObSysVars[512].name_ = "datetime_format" ; + ObSysVars[512].name_ = "date_format" ; ObSysVars[512].data_type_ = ObVarcharType ; ObSysVars[512].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[512].id_ = SYS_VAR_DATETIME_FORMAT ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DATETIME_FORMAT)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_DATETIME_FORMAT] = 512 ; - ObSysVars[512].base_value_ = "%Y-%m-%d %H:%i:%s" ; - ObSysVars[512].alias_ = "OB_SV_DATETIME_FORMAT" ; + ObSysVars[512].id_ = SYS_VAR_DATE_FORMAT ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DATE_FORMAT)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_DATE_FORMAT] = 512 ; + ObSysVars[512].base_value_ = "%Y-%m-%d" ; + ObSysVars[512].alias_ = "OB_SV_DATE_FORMAT" ; }(); [&] (){ - ObSysVars[513].default_value_ = "1" ; - ObSysVars[513].info_ = "This variable controls how the server handles clients with expired passwords" ; - ObSysVars[513].name_ = "disconnect_on_expired_password" ; - ObSysVars[513].data_type_ = ObIntType ; + ObSysVars[513].default_value_ = "%Y-%m-%d %H:%i:%s" ; + ObSysVars[513].info_ = "" ; + ObSysVars[513].name_ = "datetime_format" ; + ObSysVars[513].data_type_ = ObVarcharType ; ObSysVars[513].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[513].id_ = SYS_VAR_DISCONNECT_ON_EXPIRED_PASSWORD ; + ObSysVars[513].id_ = SYS_VAR_DATETIME_FORMAT ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DATETIME_FORMAT)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_DATETIME_FORMAT] = 513 ; + ObSysVars[513].base_value_ = "%Y-%m-%d %H:%i:%s" ; + ObSysVars[513].alias_ = "OB_SV_DATETIME_FORMAT" ; + }(); + + [&] (){ + ObSysVars[514].default_value_ = "1" ; + ObSysVars[514].info_ = "This variable controls how the server handles clients with expired passwords" ; + ObSysVars[514].name_ = "disconnect_on_expired_password" ; + ObSysVars[514].data_type_ = ObIntType ; + ObSysVars[514].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[514].id_ = SYS_VAR_DISCONNECT_ON_EXPIRED_PASSWORD ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DISCONNECT_ON_EXPIRED_PASSWORD)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_DISCONNECT_ON_EXPIRED_PASSWORD] = 513 ; - ObSysVars[513].base_value_ = "1" ; - ObSysVars[513].alias_ = "OB_SV_DISCONNECT_ON_EXPIRED_PASSWORD" ; + ObSysVarsIdToArrayIdx[SYS_VAR_DISCONNECT_ON_EXPIRED_PASSWORD] = 514 ; + ObSysVars[514].base_value_ = "1" ; + ObSysVars[514].alias_ = "OB_SV_DISCONNECT_ON_EXPIRED_PASSWORD" ; }(); [&] (){ - ObSysVars[514].default_value_ = "" ; - ObSysVars[514].info_ = "The external user name used during the authentication process, as set by the plugin used to authenticate the client" ; - ObSysVars[514].name_ = "external_user" ; - ObSysVars[514].data_type_ = ObVarcharType ; - ObSysVars[514].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::NULLABLE | ObSysVarFlag::READONLY ; - ObSysVars[514].id_ = SYS_VAR_EXTERNAL_USER ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_EXTERNAL_USER)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_EXTERNAL_USER] = 514 ; - ObSysVars[514].base_value_ = "" ; - ObSysVars[514].alias_ = "OB_SV_EXTERNAL_USER" ; - }(); - - [&] (){ - ObSysVars[515].default_value_ = "YES" ; + ObSysVars[515].default_value_ = "" ; ObSysVars[515].info_ = "The external user name used during the authentication process, as set by the plugin used to authenticate the client" ; - ObSysVars[515].name_ = "have_crypt" ; + ObSysVars[515].name_ = "external_user" ; ObSysVars[515].data_type_ = ObVarcharType ; - ObSysVars[515].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[515].id_ = SYS_VAR_HAVE_CRYPT ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_HAVE_CRYPT)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_HAVE_CRYPT] = 515 ; - ObSysVars[515].base_value_ = "YES" ; - ObSysVars[515].alias_ = "OB_SV_HAVE_CRYPT" ; + ObSysVars[515].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::NULLABLE | ObSysVarFlag::READONLY ; + ObSysVars[515].id_ = SYS_VAR_EXTERNAL_USER ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_EXTERNAL_USER)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_EXTERNAL_USER] = 515 ; + ObSysVars[515].base_value_ = "" ; + ObSysVars[515].alias_ = "OB_SV_EXTERNAL_USER" ; }(); [&] (){ ObSysVars[516].default_value_ = "YES" ; - ObSysVars[516].info_ = "YES if mysqld supports dynamic loading of plugins, NO if not. If the value is NO, you cannot use options such as --plugin-load to load plugins at server startup, or the INSTALL PLUGIN statement to load plugins at runtime" ; - ObSysVars[516].name_ = "have_dynamic_loading" ; + ObSysVars[516].info_ = "The external user name used during the authentication process, as set by the plugin used to authenticate the client" ; + ObSysVars[516].name_ = "have_crypt" ; ObSysVars[516].data_type_ = ObVarcharType ; - ObSysVars[516].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[516].id_ = SYS_VAR_HAVE_DYNAMIC_LOADING ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_HAVE_DYNAMIC_LOADING)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_HAVE_DYNAMIC_LOADING] = 516 ; + ObSysVars[516].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[516].id_ = SYS_VAR_HAVE_CRYPT ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_HAVE_CRYPT)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_HAVE_CRYPT] = 516 ; ObSysVars[516].base_value_ = "YES" ; - ObSysVars[516].alias_ = "OB_SV_HAVE_DYNAMIC_LOADING" ; + ObSysVars[516].alias_ = "OB_SV_HAVE_CRYPT" ; }(); [&] (){ - ObSysVars[517].default_value_ = "" ; - ObSysVars[517].info_ = "The location of the configuration file for the keyring_aws plugin. This variable is unavailable unless that plugin is installed" ; - ObSysVars[517].name_ = "keyring_aws_conf_file" ; + ObSysVars[517].default_value_ = "YES" ; + ObSysVars[517].info_ = "YES if mysqld supports dynamic loading of plugins, NO if not. If the value is NO, you cannot use options such as --plugin-load to load plugins at server startup, or the INSTALL PLUGIN statement to load plugins at runtime" ; + ObSysVars[517].name_ = "have_dynamic_loading" ; ObSysVars[517].data_type_ = ObVarcharType ; ObSysVars[517].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[517].id_ = SYS_VAR_KEYRING_AWS_CONF_FILE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_AWS_CONF_FILE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_AWS_CONF_FILE] = 517 ; - ObSysVars[517].base_value_ = "" ; - ObSysVars[517].alias_ = "OB_SV_KEYRING_AWS_CONF_FILE" ; + ObSysVars[517].id_ = SYS_VAR_HAVE_DYNAMIC_LOADING ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_HAVE_DYNAMIC_LOADING)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_HAVE_DYNAMIC_LOADING] = 517 ; + ObSysVars[517].base_value_ = "YES" ; + ObSysVars[517].alias_ = "OB_SV_HAVE_DYNAMIC_LOADING" ; }(); [&] (){ ObSysVars[518].default_value_ = "" ; - ObSysVars[518].info_ = "The location of the storage file for the keyring_aws plugin. This variable is unavailable unless that plugin is installed" ; - ObSysVars[518].name_ = "keyring_aws_data_file" ; + ObSysVars[518].info_ = "The location of the configuration file for the keyring_aws plugin. This variable is unavailable unless that plugin is installed" ; + ObSysVars[518].name_ = "keyring_aws_conf_file" ; ObSysVars[518].data_type_ = ObVarcharType ; ObSysVars[518].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[518].id_ = SYS_VAR_KEYRING_AWS_DATA_FILE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_AWS_DATA_FILE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_AWS_DATA_FILE] = 518 ; + ObSysVars[518].id_ = SYS_VAR_KEYRING_AWS_CONF_FILE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_AWS_CONF_FILE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_AWS_CONF_FILE] = 518 ; ObSysVars[518].base_value_ = "" ; - ObSysVars[518].alias_ = "OB_SV_KEYRING_AWS_DATA_FILE" ; + ObSysVars[518].alias_ = "OB_SV_KEYRING_AWS_CONF_FILE" ; }(); [&] (){ ObSysVars[519].default_value_ = "" ; - ObSysVars[519].info_ = "The language to use for error messages" ; - ObSysVars[519].name_ = "language" ; + ObSysVars[519].info_ = "The location of the storage file for the keyring_aws plugin. This variable is unavailable unless that plugin is installed" ; + ObSysVars[519].name_ = "keyring_aws_data_file" ; ObSysVars[519].data_type_ = ObVarcharType ; ObSysVars[519].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[519].id_ = SYS_VAR_LANGUAGE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_LANGUAGE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_LANGUAGE] = 519 ; + ObSysVars[519].id_ = SYS_VAR_KEYRING_AWS_DATA_FILE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_AWS_DATA_FILE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_AWS_DATA_FILE] = 519 ; ObSysVars[519].base_value_ = "" ; - ObSysVars[519].alias_ = "OB_SV_LANGUAGE" ; + ObSysVars[519].alias_ = "OB_SV_KEYRING_AWS_DATA_FILE" ; }(); [&] (){ ObSysVars[520].default_value_ = "" ; ObSysVars[520].info_ = "The language to use for error messages" ; - ObSysVars[520].name_ = "lc_messages_dir" ; + ObSysVars[520].name_ = "language" ; ObSysVars[520].data_type_ = ObVarcharType ; ObSysVars[520].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[520].id_ = SYS_VAR_LC_MESSAGES_DIR ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_LC_MESSAGES_DIR)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_LC_MESSAGES_DIR] = 520 ; + ObSysVars[520].id_ = SYS_VAR_LANGUAGE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_LANGUAGE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_LANGUAGE] = 520 ; ObSysVars[520].base_value_ = "" ; - ObSysVars[520].alias_ = "OB_SV_LC_MESSAGES_DIR" ; + ObSysVars[520].alias_ = "OB_SV_LANGUAGE" ; }(); [&] (){ - ObSysVars[521].default_value_ = "0" ; - ObSysVars[521].info_ = "This variable describes the case sensitivity of file names on the file system where the data directory is located" ; - ObSysVars[521].name_ = "lower_case_file_system" ; - ObSysVars[521].data_type_ = ObIntType ; + ObSysVars[521].default_value_ = "" ; + ObSysVars[521].info_ = "The language to use for error messages" ; + ObSysVars[521].name_ = "lc_messages_dir" ; + ObSysVars[521].data_type_ = ObVarcharType ; ObSysVars[521].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[521].id_ = SYS_VAR_LOWER_CASE_FILE_SYSTEM ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_LOWER_CASE_FILE_SYSTEM)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_LOWER_CASE_FILE_SYSTEM] = 521 ; - ObSysVars[521].base_value_ = "0" ; - ObSysVars[521].alias_ = "OB_SV_LOWER_CASE_FILE_SYSTEM" ; + ObSysVars[521].id_ = SYS_VAR_LC_MESSAGES_DIR ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_LC_MESSAGES_DIR)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_LC_MESSAGES_DIR] = 521 ; + ObSysVars[521].base_value_ = "" ; + ObSysVars[521].alias_ = "OB_SV_LC_MESSAGES_DIR" ; }(); [&] (){ - ObSysVars[522].default_value_ = "1024" ; - ObSysVars[522].info_ = "The maximum number of bytes of memory reserved per session for computation of normalized statement digests" ; - ObSysVars[522].name_ = "max_digest_length" ; + ObSysVars[522].default_value_ = "0" ; + ObSysVars[522].info_ = "This variable describes the case sensitivity of file names on the file system where the data directory is located" ; + ObSysVars[522].name_ = "lower_case_file_system" ; ObSysVars[522].data_type_ = ObIntType ; ObSysVars[522].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[522].id_ = SYS_VAR_MAX_DIGEST_LENGTH ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_MAX_DIGEST_LENGTH)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_MAX_DIGEST_LENGTH] = 522 ; - ObSysVars[522].base_value_ = "1024" ; - ObSysVars[522].alias_ = "OB_SV_MAX_DIGEST_LENGTH" ; + ObSysVars[522].id_ = SYS_VAR_LOWER_CASE_FILE_SYSTEM ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_LOWER_CASE_FILE_SYSTEM)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_LOWER_CASE_FILE_SYSTEM] = 522 ; + ObSysVars[522].base_value_ = "0" ; + ObSysVars[522].alias_ = "OB_SV_LOWER_CASE_FILE_SYSTEM" ; }(); [&] (){ - ObSysVars[523].default_value_ = "ndbinfo" ; - ObSysVars[523].info_ = "Shows the name used for the NDB information database" ; - ObSysVars[523].name_ = "ndbinfo_database" ; - ObSysVars[523].data_type_ = ObVarcharType ; + ObSysVars[523].default_value_ = "1024" ; + ObSysVars[523].info_ = "The maximum number of bytes of memory reserved per session for computation of normalized statement digests" ; + ObSysVars[523].name_ = "max_digest_length" ; + ObSysVars[523].data_type_ = ObIntType ; ObSysVars[523].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[523].id_ = SYS_VAR_NDBINFO_DATABASE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDBINFO_DATABASE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDBINFO_DATABASE] = 523 ; - ObSysVars[523].base_value_ = "ndbinfo" ; - ObSysVars[523].alias_ = "OB_SV_NDBINFO_DATABASE" ; + ObSysVars[523].id_ = SYS_VAR_MAX_DIGEST_LENGTH ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_MAX_DIGEST_LENGTH)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_MAX_DIGEST_LENGTH] = 523 ; + ObSysVars[523].base_value_ = "1024" ; + ObSysVars[523].alias_ = "OB_SV_MAX_DIGEST_LENGTH" ; }(); [&] (){ - ObSysVars[524].default_value_ = "ndb$" ; - ObSysVars[524].info_ = "The prefix used in naming the ndbinfo database's base tables (normally hidden, unless exposed by setting ndbinfo_show_hidden" ; - ObSysVars[524].name_ = "ndbinfo_table_prefix" ; + ObSysVars[524].default_value_ = "ndbinfo" ; + ObSysVars[524].info_ = "Shows the name used for the NDB information database" ; + ObSysVars[524].name_ = "ndbinfo_database" ; ObSysVars[524].data_type_ = ObVarcharType ; ObSysVars[524].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[524].id_ = SYS_VAR_NDBINFO_TABLE_PREFIX ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDBINFO_TABLE_PREFIX)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDBINFO_TABLE_PREFIX] = 524 ; - ObSysVars[524].base_value_ = "ndb$" ; - ObSysVars[524].alias_ = "OB_SV_NDBINFO_TABLE_PREFIX" ; + ObSysVars[524].id_ = SYS_VAR_NDBINFO_DATABASE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDBINFO_DATABASE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDBINFO_DATABASE] = 524 ; + ObSysVars[524].base_value_ = "ndbinfo" ; + ObSysVars[524].alias_ = "OB_SV_NDBINFO_DATABASE" ; }(); [&] (){ - ObSysVars[525].default_value_ = "" ; - ObSysVars[525].info_ = "Shows the version of the ndbinfo engine in use" ; - ObSysVars[525].name_ = "ndbinfo_version" ; + ObSysVars[525].default_value_ = "ndb$" ; + ObSysVars[525].info_ = "The prefix used in naming the ndbinfo database's base tables (normally hidden, unless exposed by setting ndbinfo_show_hidden" ; + ObSysVars[525].name_ = "ndbinfo_table_prefix" ; ObSysVars[525].data_type_ = ObVarcharType ; ObSysVars[525].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[525].id_ = SYS_VAR_NDBINFO_VERSION ; + ObSysVars[525].id_ = SYS_VAR_NDBINFO_TABLE_PREFIX ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDBINFO_TABLE_PREFIX)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDBINFO_TABLE_PREFIX] = 525 ; + ObSysVars[525].base_value_ = "ndb$" ; + ObSysVars[525].alias_ = "OB_SV_NDBINFO_TABLE_PREFIX" ; + }(); + + [&] (){ + ObSysVars[526].default_value_ = "" ; + ObSysVars[526].info_ = "Shows the version of the ndbinfo engine in use" ; + ObSysVars[526].name_ = "ndbinfo_version" ; + ObSysVars[526].data_type_ = ObVarcharType ; + ObSysVars[526].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[526].id_ = SYS_VAR_NDBINFO_VERSION ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDBINFO_VERSION)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDBINFO_VERSION] = 525 ; - ObSysVars[525].base_value_ = "" ; - ObSysVars[525].alias_ = "OB_SV_NDBINFO_VERSION" ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDBINFO_VERSION] = 526 ; + ObSysVars[526].base_value_ = "" ; + ObSysVars[526].alias_ = "OB_SV_NDBINFO_VERSION" ; }(); [&] (){ - ObSysVars[526].default_value_ = "32768" ; - ObSysVars[526].info_ = "This sets the size in bytes that is used for NDB transaction batches" ; - ObSysVars[526].name_ = "ndb_batch_size" ; - ObSysVars[526].data_type_ = ObIntType ; - ObSysVars[526].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[526].id_ = SYS_VAR_NDB_BATCH_SIZE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_BATCH_SIZE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_BATCH_SIZE] = 526 ; - ObSysVars[526].base_value_ = "32768" ; - ObSysVars[526].alias_ = "OB_SV_NDB_BATCH_SIZE" ; - }(); - - [&] (){ - ObSysVars[527].default_value_ = "1" ; - ObSysVars[527].info_ = "a mysqld process can use multiple connections to the cluster, effectively mimicking several SQL nodes" ; - ObSysVars[527].name_ = "ndb_cluster_connection_pool" ; + ObSysVars[527].default_value_ = "32768" ; + ObSysVars[527].info_ = "This sets the size in bytes that is used for NDB transaction batches" ; + ObSysVars[527].name_ = "ndb_batch_size" ; ObSysVars[527].data_type_ = ObIntType ; - ObSysVars[527].min_val_ = "1" ; - ObSysVars[527].max_val_ = "63" ; - ObSysVars[527].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[527].id_ = SYS_VAR_NDB_CLUSTER_CONNECTION_POOL ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_CLUSTER_CONNECTION_POOL)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_CLUSTER_CONNECTION_POOL] = 527 ; - ObSysVars[527].base_value_ = "1" ; - ObSysVars[527].alias_ = "OB_SV_NDB_CLUSTER_CONNECTION_POOL" ; + ObSysVars[527].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[527].id_ = SYS_VAR_NDB_BATCH_SIZE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_BATCH_SIZE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_BATCH_SIZE] = 527 ; + ObSysVars[527].base_value_ = "32768" ; + ObSysVars[527].alias_ = "OB_SV_NDB_BATCH_SIZE" ; }(); [&] (){ - ObSysVars[528].default_value_ = "" ; - ObSysVars[528].info_ = "Specifies a comma-separated list of node IDs for connections to the cluster used by an SQL node" ; - ObSysVars[528].name_ = "ndb_cluster_connection_pool_nodeids" ; - ObSysVars[528].data_type_ = ObVarcharType ; + ObSysVars[528].default_value_ = "1" ; + ObSysVars[528].info_ = "a mysqld process can use multiple connections to the cluster, effectively mimicking several SQL nodes" ; + ObSysVars[528].name_ = "ndb_cluster_connection_pool" ; + ObSysVars[528].data_type_ = ObIntType ; + ObSysVars[528].min_val_ = "1" ; + ObSysVars[528].max_val_ = "63" ; ObSysVars[528].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[528].id_ = SYS_VAR_NDB_CLUSTER_CONNECTION_POOL_NODEIDS ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_CLUSTER_CONNECTION_POOL_NODEIDS)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_CLUSTER_CONNECTION_POOL_NODEIDS] = 528 ; - ObSysVars[528].base_value_ = "" ; - ObSysVars[528].alias_ = "OB_SV_NDB_CLUSTER_CONNECTION_POOL_NODEIDS" ; + ObSysVars[528].id_ = SYS_VAR_NDB_CLUSTER_CONNECTION_POOL ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_CLUSTER_CONNECTION_POOL)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_CLUSTER_CONNECTION_POOL] = 528 ; + ObSysVars[528].base_value_ = "1" ; + ObSysVars[528].alias_ = "OB_SV_NDB_CLUSTER_CONNECTION_POOL" ; }(); [&] (){ - ObSysVars[529].default_value_ = "0" ; - ObSysVars[529].info_ = "Causes a replica mysqld to log any updates received from its immediate source to the mysql.ndb_apply_status table in its own binary log using its own server ID rather than the server ID of the source" ; - ObSysVars[529].name_ = "ndb_log_apply_status" ; - ObSysVars[529].data_type_ = ObIntType ; + ObSysVars[529].default_value_ = "" ; + ObSysVars[529].info_ = "Specifies a comma-separated list of node IDs for connections to the cluster used by an SQL node" ; + ObSysVars[529].name_ = "ndb_cluster_connection_pool_nodeids" ; + ObSysVars[529].data_type_ = ObVarcharType ; ObSysVars[529].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[529].id_ = SYS_VAR_NDB_LOG_APPLY_STATUS ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_LOG_APPLY_STATUS)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_LOG_APPLY_STATUS] = 529 ; - ObSysVars[529].base_value_ = "0" ; - ObSysVars[529].alias_ = "OB_SV_NDB_LOG_APPLY_STATUS" ; + ObSysVars[529].id_ = SYS_VAR_NDB_CLUSTER_CONNECTION_POOL_NODEIDS ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_CLUSTER_CONNECTION_POOL_NODEIDS)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_CLUSTER_CONNECTION_POOL_NODEIDS] = 529 ; + ObSysVars[529].base_value_ = "" ; + ObSysVars[529].alias_ = "OB_SV_NDB_CLUSTER_CONNECTION_POOL_NODEIDS" ; }(); [&] (){ - ObSysVars[530].default_value_ = "1" ; - ObSysVars[530].info_ = "Causes updates to NDB tables to be written to the binary log. Setting this variable has no effect if binary logging is not already enabled for the server using log_bin" ; - ObSysVars[530].name_ = "ndb_log_bin" ; + ObSysVars[530].default_value_ = "0" ; + ObSysVars[530].info_ = "Causes a replica mysqld to log any updates received from its immediate source to the mysql.ndb_apply_status table in its own binary log using its own server ID rather than the server ID of the source" ; + ObSysVars[530].name_ = "ndb_log_apply_status" ; ObSysVars[530].data_type_ = ObIntType ; - ObSysVars[530].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[530].id_ = SYS_VAR_NDB_LOG_BIN ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_LOG_BIN)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_LOG_BIN] = 530 ; - ObSysVars[530].base_value_ = "1" ; - ObSysVars[530].alias_ = "OB_SV_NDB_LOG_BIN" ; + ObSysVars[530].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[530].id_ = SYS_VAR_NDB_LOG_APPLY_STATUS ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_LOG_APPLY_STATUS)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_LOG_APPLY_STATUS] = 530 ; + ObSysVars[530].base_value_ = "0" ; + ObSysVars[530].alias_ = "OB_SV_NDB_LOG_APPLY_STATUS" ; }(); [&] (){ - ObSysVars[531].default_value_ = "0" ; - ObSysVars[531].info_ = "When this option is specified, and complete logging of all found row events is not possible, the mysqld process is terminated" ; - ObSysVars[531].name_ = "ndb_log_fail_terminate" ; + ObSysVars[531].default_value_ = "1" ; + ObSysVars[531].info_ = "Causes updates to NDB tables to be written to the binary log. Setting this variable has no effect if binary logging is not already enabled for the server using log_bin" ; + ObSysVars[531].name_ = "ndb_log_bin" ; ObSysVars[531].data_type_ = ObIntType ; - ObSysVars[531].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[531].id_ = SYS_VAR_NDB_LOG_FAIL_TERMINATE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_LOG_FAIL_TERMINATE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_LOG_FAIL_TERMINATE] = 531 ; - ObSysVars[531].base_value_ = "0" ; - ObSysVars[531].alias_ = "OB_SV_NDB_LOG_FAIL_TERMINATE" ; + ObSysVars[531].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[531].id_ = SYS_VAR_NDB_LOG_BIN ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_LOG_BIN)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_LOG_BIN] = 531 ; + ObSysVars[531].base_value_ = "1" ; + ObSysVars[531].alias_ = "OB_SV_NDB_LOG_BIN" ; }(); [&] (){ ObSysVars[532].default_value_ = "0" ; - ObSysVars[532].info_ = "Shows whether the originating server ID and epoch are logged in the ndb_binlog_index table" ; - ObSysVars[532].name_ = "ndb_log_orig" ; + ObSysVars[532].info_ = "When this option is specified, and complete logging of all found row events is not possible, the mysqld process is terminated" ; + ObSysVars[532].name_ = "ndb_log_fail_terminate" ; ObSysVars[532].data_type_ = ObIntType ; ObSysVars[532].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[532].id_ = SYS_VAR_NDB_LOG_ORIG ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_LOG_ORIG)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_LOG_ORIG] = 532 ; + ObSysVars[532].id_ = SYS_VAR_NDB_LOG_FAIL_TERMINATE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_LOG_FAIL_TERMINATE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_LOG_FAIL_TERMINATE] = 532 ; ObSysVars[532].base_value_ = "0" ; - ObSysVars[532].alias_ = "OB_SV_NDB_LOG_ORIG" ; + ObSysVars[532].alias_ = "OB_SV_NDB_LOG_FAIL_TERMINATE" ; }(); [&] (){ ObSysVars[533].default_value_ = "0" ; - ObSysVars[533].info_ = "shows whether a replica mysqld writes NDB transaction IDs in the binary log" ; - ObSysVars[533].name_ = "ndb_log_transaction_id" ; + ObSysVars[533].info_ = "Shows whether the originating server ID and epoch are logged in the ndb_binlog_index table" ; + ObSysVars[533].name_ = "ndb_log_orig" ; ObSysVars[533].data_type_ = ObIntType ; ObSysVars[533].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[533].id_ = SYS_VAR_NDB_LOG_TRANSACTION_ID ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_LOG_TRANSACTION_ID)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_LOG_TRANSACTION_ID] = 533 ; + ObSysVars[533].id_ = SYS_VAR_NDB_LOG_ORIG ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_LOG_ORIG)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_LOG_ORIG] = 533 ; ObSysVars[533].base_value_ = "0" ; - ObSysVars[533].alias_ = "OB_SV_NDB_LOG_TRANSACTION_ID" ; + ObSysVars[533].alias_ = "OB_SV_NDB_LOG_ORIG" ; }(); [&] (){ - ObSysVars[534].default_value_ = "3" ; - ObSysVars[534].info_ = "" ; - ObSysVars[534].name_ = "ndb_optimized_node_selection" ; + ObSysVars[534].default_value_ = "0" ; + ObSysVars[534].info_ = "shows whether a replica mysqld writes NDB transaction IDs in the binary log" ; + ObSysVars[534].name_ = "ndb_log_transaction_id" ; ObSysVars[534].data_type_ = ObIntType ; ObSysVars[534].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[534].id_ = SYS_VAR_NDB_OPTIMIZED_NODE_SELECTION ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_OPTIMIZED_NODE_SELECTION)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_OPTIMIZED_NODE_SELECTION] = 534 ; - ObSysVars[534].base_value_ = "3" ; - ObSysVars[534].alias_ = "OB_SV_NDB_OPTIMIZED_NODE_SELECTION" ; + ObSysVars[534].id_ = SYS_VAR_NDB_LOG_TRANSACTION_ID ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_LOG_TRANSACTION_ID)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_LOG_TRANSACTION_ID] = 534 ; + ObSysVars[534].base_value_ = "0" ; + ObSysVars[534].alias_ = "OB_SV_NDB_LOG_TRANSACTION_ID" ; }(); [&] (){ - ObSysVars[535].default_value_ = "" ; - ObSysVars[535].info_ = "If this MySQL Server is connected to an NDB cluster, this read-only variable shows the cluster system name. Otherwise, the value is an empty string" ; - ObSysVars[535].name_ = "Ndb_system_name" ; - ObSysVars[535].data_type_ = ObVarcharType ; + ObSysVars[535].default_value_ = "3" ; + ObSysVars[535].info_ = "" ; + ObSysVars[535].name_ = "ndb_optimized_node_selection" ; + ObSysVars[535].data_type_ = ObIntType ; ObSysVars[535].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[535].id_ = SYS_VAR_NDB_SYSTEM_NAME ; + ObSysVars[535].id_ = SYS_VAR_NDB_OPTIMIZED_NODE_SELECTION ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_OPTIMIZED_NODE_SELECTION)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_OPTIMIZED_NODE_SELECTION] = 535 ; + ObSysVars[535].base_value_ = "3" ; + ObSysVars[535].alias_ = "OB_SV_NDB_OPTIMIZED_NODE_SELECTION" ; + }(); + + [&] (){ + ObSysVars[536].default_value_ = "" ; + ObSysVars[536].info_ = "If this MySQL Server is connected to an NDB cluster, this read-only variable shows the cluster system name. Otherwise, the value is an empty string" ; + ObSysVars[536].name_ = "Ndb_system_name" ; + ObSysVars[536].data_type_ = ObVarcharType ; + ObSysVars[536].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[536].id_ = SYS_VAR_NDB_SYSTEM_NAME ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_SYSTEM_NAME)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_SYSTEM_NAME] = 535 ; - ObSysVars[535].base_value_ = "" ; - ObSysVars[535].alias_ = "OB_SV_NDB_SYSTEM_NAME" ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_SYSTEM_NAME] = 536 ; + ObSysVars[536].base_value_ = "" ; + ObSysVars[536].alias_ = "OB_SV_NDB_SYSTEM_NAME" ; }(); [&] (){ - ObSysVars[536].default_value_ = "0" ; - ObSysVars[536].info_ = "Forces NDB to use copying of tables in the event of problems with online ALTER TABLE operations" ; - ObSysVars[536].name_ = "ndb_use_copying_alter_table" ; - ObSysVars[536].data_type_ = ObIntType ; - ObSysVars[536].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[536].id_ = SYS_VAR_NDB_USE_COPYING_ALTER_TABLE ; + ObSysVars[537].default_value_ = "0" ; + ObSysVars[537].info_ = "Forces NDB to use copying of tables in the event of problems with online ALTER TABLE operations" ; + ObSysVars[537].name_ = "ndb_use_copying_alter_table" ; + ObSysVars[537].data_type_ = ObIntType ; + ObSysVars[537].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[537].id_ = SYS_VAR_NDB_USE_COPYING_ALTER_TABLE ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_USE_COPYING_ALTER_TABLE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_USE_COPYING_ALTER_TABLE] = 536 ; - ObSysVars[536].base_value_ = "0" ; - ObSysVars[536].alias_ = "OB_SV_NDB_USE_COPYING_ALTER_TABLE" ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_USE_COPYING_ALTER_TABLE] = 537 ; + ObSysVars[537].base_value_ = "0" ; + ObSysVars[537].alias_ = "OB_SV_NDB_USE_COPYING_ALTER_TABLE" ; }(); [&] (){ - ObSysVars[537].default_value_ = "" ; - ObSysVars[537].info_ = "NDB engine version in ndb-x.y.z format" ; - ObSysVars[537].name_ = "ndb_version_string" ; - ObSysVars[537].data_type_ = ObVarcharType ; - ObSysVars[537].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[537].id_ = SYS_VAR_NDB_VERSION_STRING ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_VERSION_STRING)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_VERSION_STRING] = 537 ; - ObSysVars[537].base_value_ = "" ; - ObSysVars[537].alias_ = "OB_SV_NDB_VERSION_STRING" ; - }(); - - [&] (){ - ObSysVars[538].default_value_ = "30" ; - ObSysVars[538].info_ = "This option sets the period of time that the MySQL server waits for connections to NDB Cluster management and data nodes to be established before accepting MySQL client connections." ; - ObSysVars[538].name_ = "ndb_wait_connected" ; - ObSysVars[538].data_type_ = ObIntType ; - ObSysVars[538].min_val_ = "0" ; - ObSysVars[538].max_val_ = "31536000" ; + ObSysVars[538].default_value_ = "" ; + ObSysVars[538].info_ = "NDB engine version in ndb-x.y.z format" ; + ObSysVars[538].name_ = "ndb_version_string" ; + ObSysVars[538].data_type_ = ObVarcharType ; ObSysVars[538].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[538].id_ = SYS_VAR_NDB_WAIT_CONNECTED ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_WAIT_CONNECTED)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_WAIT_CONNECTED] = 538 ; - ObSysVars[538].base_value_ = "30" ; - ObSysVars[538].alias_ = "OB_SV_NDB_WAIT_CONNECTED" ; + ObSysVars[538].id_ = SYS_VAR_NDB_VERSION_STRING ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_VERSION_STRING)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_VERSION_STRING] = 538 ; + ObSysVars[538].base_value_ = "" ; + ObSysVars[538].alias_ = "OB_SV_NDB_VERSION_STRING" ; }(); [&] (){ ObSysVars[539].default_value_ = "30" ; - ObSysVars[539].info_ = "This variable shows the period of time that the MySQL server waits for the NDB storage engine to complete setup before timing out and treating NDB as unavailable. The time is specified in seconds." ; - ObSysVars[539].name_ = "ndb_wait_setup" ; + ObSysVars[539].info_ = "This option sets the period of time that the MySQL server waits for connections to NDB Cluster management and data nodes to be established before accepting MySQL client connections." ; + ObSysVars[539].name_ = "ndb_wait_connected" ; ObSysVars[539].data_type_ = ObIntType ; ObSysVars[539].min_val_ = "0" ; ObSysVars[539].max_val_ = "31536000" ; ObSysVars[539].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[539].id_ = SYS_VAR_NDB_WAIT_SETUP ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_WAIT_SETUP)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_WAIT_SETUP] = 539 ; + ObSysVars[539].id_ = SYS_VAR_NDB_WAIT_CONNECTED ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_WAIT_CONNECTED)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_WAIT_CONNECTED] = 539 ; ObSysVars[539].base_value_ = "30" ; - ObSysVars[539].alias_ = "OB_SV_NDB_WAIT_SETUP" ; + ObSysVars[539].alias_ = "OB_SV_NDB_WAIT_CONNECTED" ; }(); [&] (){ - ObSysVars[540].default_value_ = "" ; - ObSysVars[540].info_ = "If the current client is a proxy for another user, this variable is the proxy user account name" ; - ObSysVars[540].name_ = "proxy_user" ; - ObSysVars[540].data_type_ = ObVarcharType ; - ObSysVars[540].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[540].id_ = SYS_VAR_PROXY_USER ; + ObSysVars[540].default_value_ = "30" ; + ObSysVars[540].info_ = "This variable shows the period of time that the MySQL server waits for the NDB storage engine to complete setup before timing out and treating NDB as unavailable. The time is specified in seconds." ; + ObSysVars[540].name_ = "ndb_wait_setup" ; + ObSysVars[540].data_type_ = ObIntType ; + ObSysVars[540].min_val_ = "0" ; + ObSysVars[540].max_val_ = "31536000" ; + ObSysVars[540].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[540].id_ = SYS_VAR_NDB_WAIT_SETUP ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_WAIT_SETUP)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_WAIT_SETUP] = 540 ; + ObSysVars[540].base_value_ = "30" ; + ObSysVars[540].alias_ = "OB_SV_NDB_WAIT_SETUP" ; + }(); + + [&] (){ + ObSysVars[541].default_value_ = "" ; + ObSysVars[541].info_ = "If the current client is a proxy for another user, this variable is the proxy user account name" ; + ObSysVars[541].name_ = "proxy_user" ; + ObSysVars[541].data_type_ = ObVarcharType ; + ObSysVars[541].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[541].id_ = SYS_VAR_PROXY_USER ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_PROXY_USER)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_PROXY_USER] = 540 ; - ObSysVars[540].base_value_ = "" ; - ObSysVars[540].alias_ = "OB_SV_PROXY_USER" ; + ObSysVarsIdToArrayIdx[SYS_VAR_PROXY_USER] = 541 ; + ObSysVars[541].base_value_ = "" ; + ObSysVars[541].alias_ = "OB_SV_PROXY_USER" ; }(); [&] (){ - ObSysVars[541].default_value_ = "1" ; - ObSysVars[541].info_ = "It controls whether the server autogenerates RSA private/public key-pair files in the data directory" ; - ObSysVars[541].name_ = "sha256_password_auto_generate_rsa_keys" ; - ObSysVars[541].data_type_ = ObIntType ; - ObSysVars[541].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[541].id_ = SYS_VAR_SHA256_PASSWORD_AUTO_GENERATE_RSA_KEYS ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHA256_PASSWORD_AUTO_GENERATE_RSA_KEYS)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_SHA256_PASSWORD_AUTO_GENERATE_RSA_KEYS] = 541 ; - ObSysVars[541].base_value_ = "1" ; - ObSysVars[541].alias_ = "OB_SV_SHA256_PASSWORD_AUTO_GENERATE_RSA_KEYS" ; - }(); - - [&] (){ - ObSysVars[542].default_value_ = "private_key.pem" ; - ObSysVars[542].info_ = "Its value is the path name of the RSA private key file for the sha256_password authentication plugin" ; - ObSysVars[542].name_ = "sha256_password_private_key_path" ; - ObSysVars[542].data_type_ = ObVarcharType ; + ObSysVars[542].default_value_ = "1" ; + ObSysVars[542].info_ = "It controls whether the server autogenerates RSA private/public key-pair files in the data directory" ; + ObSysVars[542].name_ = "sha256_password_auto_generate_rsa_keys" ; + ObSysVars[542].data_type_ = ObIntType ; ObSysVars[542].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[542].id_ = SYS_VAR_SHA256_PASSWORD_PRIVATE_KEY_PATH ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHA256_PASSWORD_PRIVATE_KEY_PATH)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_SHA256_PASSWORD_PRIVATE_KEY_PATH] = 542 ; - ObSysVars[542].base_value_ = "private_key.pem" ; - ObSysVars[542].alias_ = "OB_SV_SHA256_PASSWORD_PRIVATE_KEY_PATH" ; + ObSysVars[542].id_ = SYS_VAR_SHA256_PASSWORD_AUTO_GENERATE_RSA_KEYS ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHA256_PASSWORD_AUTO_GENERATE_RSA_KEYS)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_SHA256_PASSWORD_AUTO_GENERATE_RSA_KEYS] = 542 ; + ObSysVars[542].base_value_ = "1" ; + ObSysVars[542].alias_ = "OB_SV_SHA256_PASSWORD_AUTO_GENERATE_RSA_KEYS" ; }(); [&] (){ - ObSysVars[543].default_value_ = "public_key.pem" ; - ObSysVars[543].info_ = "Its value is the path name of the RSA public key file for the sha256_password authentication plugin" ; - ObSysVars[543].name_ = "sha256_password_public_key_path" ; + ObSysVars[543].default_value_ = "private_key.pem" ; + ObSysVars[543].info_ = "Its value is the path name of the RSA private key file for the sha256_password authentication plugin" ; + ObSysVars[543].name_ = "sha256_password_private_key_path" ; ObSysVars[543].data_type_ = ObVarcharType ; ObSysVars[543].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[543].id_ = SYS_VAR_SHA256_PASSWORD_PUBLIC_KEY_PATH ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHA256_PASSWORD_PUBLIC_KEY_PATH)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_SHA256_PASSWORD_PUBLIC_KEY_PATH] = 543 ; - ObSysVars[543].base_value_ = "public_key.pem" ; - ObSysVars[543].alias_ = "OB_SV_SHA256_PASSWORD_PUBLIC_KEY_PATH" ; + ObSysVars[543].id_ = SYS_VAR_SHA256_PASSWORD_PRIVATE_KEY_PATH ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHA256_PASSWORD_PRIVATE_KEY_PATH)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_SHA256_PASSWORD_PRIVATE_KEY_PATH] = 543 ; + ObSysVars[543].base_value_ = "private_key.pem" ; + ObSysVars[543].alias_ = "OB_SV_SHA256_PASSWORD_PRIVATE_KEY_PATH" ; }(); [&] (){ - ObSysVars[544].default_value_ = "0" ; - ObSysVars[544].info_ = " If the variable value is ON, the SHOW DATABASES statement is permitted only to users who have the SHOW DATABASES privilege, and the statement displays all database names" ; - ObSysVars[544].name_ = "skip_show_database" ; + ObSysVars[544].default_value_ = "public_key.pem" ; + ObSysVars[544].info_ = "Its value is the path name of the RSA public key file for the sha256_password authentication plugin" ; + ObSysVars[544].name_ = "sha256_password_public_key_path" ; ObSysVars[544].data_type_ = ObVarcharType ; ObSysVars[544].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[544].id_ = SYS_VAR_SKIP_SHOW_DATABASE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SKIP_SHOW_DATABASE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_SKIP_SHOW_DATABASE] = 544 ; - ObSysVars[544].base_value_ = "0" ; - ObSysVars[544].alias_ = "OB_SV_SKIP_SHOW_DATABASE" ; + ObSysVars[544].id_ = SYS_VAR_SHA256_PASSWORD_PUBLIC_KEY_PATH ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHA256_PASSWORD_PUBLIC_KEY_PATH)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_SHA256_PASSWORD_PUBLIC_KEY_PATH] = 544 ; + ObSysVars[544].base_value_ = "public_key.pem" ; + ObSysVars[544].alias_ = "OB_SV_SHA256_PASSWORD_PUBLIC_KEY_PATH" ; }(); [&] (){ - ObSysVars[545].default_value_ = "" ; - ObSysVars[545].info_ = "This option tells the server to load the named plugins at startup" ; - ObSysVars[545].name_ = "plugin_load" ; + ObSysVars[545].default_value_ = "0" ; + ObSysVars[545].info_ = " If the variable value is ON, the SHOW DATABASES statement is permitted only to users who have the SHOW DATABASES privilege, and the statement displays all database names" ; + ObSysVars[545].name_ = "skip_show_database" ; ObSysVars[545].data_type_ = ObVarcharType ; - ObSysVars[545].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[545].id_ = SYS_VAR_PLUGIN_LOAD ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_PLUGIN_LOAD)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_PLUGIN_LOAD] = 545 ; - ObSysVars[545].base_value_ = "" ; - ObSysVars[545].alias_ = "OB_SV_PLUGIN_LOAD" ; + ObSysVars[545].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[545].id_ = SYS_VAR_SKIP_SHOW_DATABASE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SKIP_SHOW_DATABASE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_SKIP_SHOW_DATABASE] = 545 ; + ObSysVars[545].base_value_ = "0" ; + ObSysVars[545].alias_ = "OB_SV_SKIP_SHOW_DATABASE" ; }(); [&] (){ ObSysVars[546].default_value_ = "" ; - ObSysVars[546].info_ = "adds a plugin or plugins to the set of plugins to be loaded at startup" ; - ObSysVars[546].name_ = "plugin_load_add" ; + ObSysVars[546].info_ = "This option tells the server to load the named plugins at startup" ; + ObSysVars[546].name_ = "plugin_load" ; ObSysVars[546].data_type_ = ObVarcharType ; ObSysVars[546].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[546].id_ = SYS_VAR_PLUGIN_LOAD_ADD ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_PLUGIN_LOAD_ADD)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_PLUGIN_LOAD_ADD] = 546 ; + ObSysVars[546].id_ = SYS_VAR_PLUGIN_LOAD ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_PLUGIN_LOAD)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_PLUGIN_LOAD] = 546 ; ObSysVars[546].base_value_ = "" ; - ObSysVars[546].alias_ = "OB_SV_PLUGIN_LOAD_ADD" ; + ObSysVars[546].alias_ = "OB_SV_PLUGIN_LOAD" ; }(); [&] (){ - ObSysVars[547].default_value_ = "0" ; - ObSysVars[547].info_ = "the server stores all temporary tables on disk rather than in memory" ; - ObSysVars[547].name_ = "big_tables" ; - ObSysVars[547].data_type_ = ObIntType ; - ObSysVars[547].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[547].id_ = SYS_VAR_BIG_TABLES ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_BIG_TABLES)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_BIG_TABLES] = 547 ; - ObSysVars[547].base_value_ = "0" ; - ObSysVars[547].alias_ = "OB_SV_BIG_TABLES" ; + ObSysVars[547].default_value_ = "" ; + ObSysVars[547].info_ = "adds a plugin or plugins to the set of plugins to be loaded at startup" ; + ObSysVars[547].name_ = "plugin_load_add" ; + ObSysVars[547].data_type_ = ObVarcharType ; + ObSysVars[547].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[547].id_ = SYS_VAR_PLUGIN_LOAD_ADD ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_PLUGIN_LOAD_ADD)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_PLUGIN_LOAD_ADD] = 547 ; + ObSysVars[547].base_value_ = "" ; + ObSysVars[547].alias_ = "OB_SV_PLUGIN_LOAD_ADD" ; }(); [&] (){ ObSysVars[548].default_value_ = "0" ; - ObSysVars[548].info_ = "If the check_proxy_users system variable is enabled, the server performs proxy user mapping for any authentication plugins that make such a request" ; - ObSysVars[548].name_ = "check_proxy_users" ; + ObSysVars[548].info_ = "the server stores all temporary tables on disk rather than in memory" ; + ObSysVars[548].name_ = "big_tables" ; ObSysVars[548].data_type_ = ObIntType ; - ObSysVars[548].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[548].id_ = SYS_VAR_CHECK_PROXY_USERS ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_CHECK_PROXY_USERS)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_CHECK_PROXY_USERS] = 548 ; + ObSysVars[548].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[548].id_ = SYS_VAR_BIG_TABLES ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_BIG_TABLES)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_BIG_TABLES] = 548 ; ObSysVars[548].base_value_ = "0" ; - ObSysVars[548].alias_ = "OB_SV_CHECK_PROXY_USERS" ; + ObSysVars[548].alias_ = "OB_SV_BIG_TABLES" ; }(); [&] (){ ObSysVars[549].default_value_ = "0" ; - ObSysVars[549].info_ = "The number of consecutive failed connection attempts permitted to accounts before the server adds a delay for subsequent connection attempts" ; - ObSysVars[549].name_ = "connection_control_failed_connections_threshold" ; + ObSysVars[549].info_ = "If the check_proxy_users system variable is enabled, the server performs proxy user mapping for any authentication plugins that make such a request" ; + ObSysVars[549].name_ = "check_proxy_users" ; ObSysVars[549].data_type_ = ObIntType ; - ObSysVars[549].min_val_ = "0" ; - ObSysVars[549].max_val_ = "2147483647" ; ObSysVars[549].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[549].id_ = SYS_VAR_CONNECTION_CONTROL_FAILED_CONNECTIONS_THRESHOLD ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_CONNECTION_CONTROL_FAILED_CONNECTIONS_THRESHOLD)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_CONNECTION_CONTROL_FAILED_CONNECTIONS_THRESHOLD] = 549 ; + ObSysVars[549].id_ = SYS_VAR_CHECK_PROXY_USERS ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_CHECK_PROXY_USERS)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_CHECK_PROXY_USERS] = 549 ; ObSysVars[549].base_value_ = "0" ; - ObSysVars[549].alias_ = "OB_SV_CONNECTION_CONTROL_FAILED_CONNECTIONS_THRESHOLD" ; + ObSysVars[549].alias_ = "OB_SV_CHECK_PROXY_USERS" ; }(); [&] (){ - ObSysVars[550].default_value_ = "2147483647" ; - ObSysVars[550].info_ = "The maximum delay in milliseconds for server response to failed connection attempts, if connection_control_failed_connections_threshold is greater than zero" ; - ObSysVars[550].name_ = "connection_control_max_connection_delay" ; + ObSysVars[550].default_value_ = "0" ; + ObSysVars[550].info_ = "The number of consecutive failed connection attempts permitted to accounts before the server adds a delay for subsequent connection attempts" ; + ObSysVars[550].name_ = "connection_control_failed_connections_threshold" ; ObSysVars[550].data_type_ = ObIntType ; - ObSysVars[550].min_val_ = "1000" ; + ObSysVars[550].min_val_ = "0" ; ObSysVars[550].max_val_ = "2147483647" ; ObSysVars[550].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[550].id_ = SYS_VAR_CONNECTION_CONTROL_MAX_CONNECTION_DELAY ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_CONNECTION_CONTROL_MAX_CONNECTION_DELAY)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_CONNECTION_CONTROL_MAX_CONNECTION_DELAY] = 550 ; - ObSysVars[550].base_value_ = "2147483647" ; - ObSysVars[550].alias_ = "OB_SV_CONNECTION_CONTROL_MAX_CONNECTION_DELAY" ; + ObSysVars[550].id_ = SYS_VAR_CONNECTION_CONTROL_FAILED_CONNECTIONS_THRESHOLD ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_CONNECTION_CONTROL_FAILED_CONNECTIONS_THRESHOLD)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_CONNECTION_CONTROL_FAILED_CONNECTIONS_THRESHOLD] = 550 ; + ObSysVars[550].base_value_ = "0" ; + ObSysVars[550].alias_ = "OB_SV_CONNECTION_CONTROL_FAILED_CONNECTIONS_THRESHOLD" ; }(); [&] (){ - ObSysVars[551].default_value_ = "1000" ; - ObSysVars[551].info_ = "The minmum delay in milliseconds for server response to failed connection attempts, if connection_control_failed_connections_threshold is greater than zero" ; - ObSysVars[551].name_ = "connection_control_min_connection_delay" ; + ObSysVars[551].default_value_ = "2147483647" ; + ObSysVars[551].info_ = "The maximum delay in milliseconds for server response to failed connection attempts, if connection_control_failed_connections_threshold is greater than zero" ; + ObSysVars[551].name_ = "connection_control_max_connection_delay" ; ObSysVars[551].data_type_ = ObIntType ; ObSysVars[551].min_val_ = "1000" ; ObSysVars[551].max_val_ = "2147483647" ; ObSysVars[551].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[551].id_ = SYS_VAR_CONNECTION_CONTROL_MIN_CONNECTION_DELAY ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_CONNECTION_CONTROL_MIN_CONNECTION_DELAY)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_CONNECTION_CONTROL_MIN_CONNECTION_DELAY] = 551 ; - ObSysVars[551].base_value_ = "1000" ; - ObSysVars[551].alias_ = "OB_SV_CONNECTION_CONTROL_MIN_CONNECTION_DELAY" ; + ObSysVars[551].id_ = SYS_VAR_CONNECTION_CONTROL_MAX_CONNECTION_DELAY ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_CONNECTION_CONTROL_MAX_CONNECTION_DELAY)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_CONNECTION_CONTROL_MAX_CONNECTION_DELAY] = 551 ; + ObSysVars[551].base_value_ = "2147483647" ; + ObSysVars[551].alias_ = "OB_SV_CONNECTION_CONTROL_MAX_CONNECTION_DELAY" ; }(); [&] (){ - ObSysVars[552].default_value_ = "0" ; - ObSysVars[552].info_ = "The default mode value to use for the WEEK() function" ; - ObSysVars[552].name_ = "default_week_format" ; + ObSysVars[552].default_value_ = "1000" ; + ObSysVars[552].info_ = "The minmum delay in milliseconds for server response to failed connection attempts, if connection_control_failed_connections_threshold is greater than zero" ; + ObSysVars[552].name_ = "connection_control_min_connection_delay" ; ObSysVars[552].data_type_ = ObIntType ; - ObSysVars[552].min_val_ = "0" ; - ObSysVars[552].max_val_ = "7" ; - ObSysVars[552].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[552].id_ = SYS_VAR_DEFAULT_WEEK_FORMAT ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DEFAULT_WEEK_FORMAT)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_DEFAULT_WEEK_FORMAT] = 552 ; - ObSysVars[552].base_value_ = "0" ; - ObSysVars[552].alias_ = "OB_SV_DEFAULT_WEEK_FORMAT" ; + ObSysVars[552].min_val_ = "1000" ; + ObSysVars[552].max_val_ = "2147483647" ; + ObSysVars[552].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[552].id_ = SYS_VAR_CONNECTION_CONTROL_MIN_CONNECTION_DELAY ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_CONNECTION_CONTROL_MIN_CONNECTION_DELAY)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_CONNECTION_CONTROL_MIN_CONNECTION_DELAY] = 552 ; + ObSysVars[552].base_value_ = "1000" ; + ObSysVars[552].alias_ = "OB_SV_CONNECTION_CONTROL_MIN_CONNECTION_DELAY" ; }(); [&] (){ - ObSysVars[553].default_value_ = "300" ; - ObSysVars[553].info_ = "" ; - ObSysVars[553].name_ = "delayed_insert_timeout" ; + ObSysVars[553].default_value_ = "0" ; + ObSysVars[553].info_ = "The default mode value to use for the WEEK() function" ; + ObSysVars[553].name_ = "default_week_format" ; ObSysVars[553].data_type_ = ObIntType ; - ObSysVars[553].min_val_ = "1" ; - ObSysVars[553].max_val_ = "31536000" ; - ObSysVars[553].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[553].id_ = SYS_VAR_DELAYED_INSERT_TIMEOUT ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DELAYED_INSERT_TIMEOUT)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_DELAYED_INSERT_TIMEOUT] = 553 ; - ObSysVars[553].base_value_ = "300" ; - ObSysVars[553].alias_ = "OB_SV_DELAYED_INSERT_TIMEOUT" ; + ObSysVars[553].min_val_ = "0" ; + ObSysVars[553].max_val_ = "7" ; + ObSysVars[553].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[553].id_ = SYS_VAR_DEFAULT_WEEK_FORMAT ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DEFAULT_WEEK_FORMAT)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_DEFAULT_WEEK_FORMAT] = 553 ; + ObSysVars[553].base_value_ = "0" ; + ObSysVars[553].alias_ = "OB_SV_DEFAULT_WEEK_FORMAT" ; }(); [&] (){ - ObSysVars[554].default_value_ = "1000" ; + ObSysVars[554].default_value_ = "300" ; ObSysVars[554].info_ = "" ; - ObSysVars[554].name_ = "delayed_queue_size" ; - ObSysVars[554].data_type_ = ObUInt64Type ; + ObSysVars[554].name_ = "delayed_insert_timeout" ; + ObSysVars[554].data_type_ = ObIntType ; ObSysVars[554].min_val_ = "1" ; - ObSysVars[554].max_val_ = "18446744073709551615" ; + ObSysVars[554].max_val_ = "31536000" ; ObSysVars[554].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[554].id_ = SYS_VAR_DELAYED_QUEUE_SIZE ; + ObSysVars[554].id_ = SYS_VAR_DELAYED_INSERT_TIMEOUT ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DELAYED_INSERT_TIMEOUT)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_DELAYED_INSERT_TIMEOUT] = 554 ; + ObSysVars[554].base_value_ = "300" ; + ObSysVars[554].alias_ = "OB_SV_DELAYED_INSERT_TIMEOUT" ; + }(); + + [&] (){ + ObSysVars[555].default_value_ = "1000" ; + ObSysVars[555].info_ = "" ; + ObSysVars[555].name_ = "delayed_queue_size" ; + ObSysVars[555].data_type_ = ObUInt64Type ; + ObSysVars[555].min_val_ = "1" ; + ObSysVars[555].max_val_ = "18446744073709551615" ; + ObSysVars[555].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[555].id_ = SYS_VAR_DELAYED_QUEUE_SIZE ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DELAYED_QUEUE_SIZE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_DELAYED_QUEUE_SIZE] = 554 ; - ObSysVars[554].base_value_ = "1000" ; - ObSysVars[554].alias_ = "OB_SV_DELAYED_QUEUE_SIZE" ; + ObSysVarsIdToArrayIdx[SYS_VAR_DELAYED_QUEUE_SIZE] = 555 ; + ObSysVars[555].base_value_ = "1000" ; + ObSysVars[555].alias_ = "OB_SV_DELAYED_QUEUE_SIZE" ; }(); [&] (){ - ObSysVars[555].default_value_ = "200" ; - ObSysVars[555].info_ = "This variable indicates the number of equality ranges in an equality comparison condition when the optimizer should switch from using index dives to index statistics in estimating the number of qualifying rows" ; - ObSysVars[555].name_ = "eq_range_index_dive_limit" ; - ObSysVars[555].data_type_ = ObIntType ; - ObSysVars[555].min_val_ = "0" ; - ObSysVars[555].max_val_ = "4294967295" ; - ObSysVars[555].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[555].id_ = SYS_VAR_EQ_RANGE_INDEX_DIVE_LIMIT ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_EQ_RANGE_INDEX_DIVE_LIMIT)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_EQ_RANGE_INDEX_DIVE_LIMIT] = 555 ; - ObSysVars[555].base_value_ = "200" ; - ObSysVars[555].alias_ = "OB_SV_EQ_RANGE_INDEX_DIVE_LIMIT" ; - }(); - - [&] (){ - ObSysVars[556].default_value_ = "1" ; - ObSysVars[556].info_ = "Causes InnoDB to automatically recalculate persistent statistics after the data in a table is changed substantially, merely simulates MySQL 5.7" ; - ObSysVars[556].name_ = "innodb_stats_auto_recalc" ; + ObSysVars[556].default_value_ = "200" ; + ObSysVars[556].info_ = "This variable indicates the number of equality ranges in an equality comparison condition when the optimizer should switch from using index dives to index statistics in estimating the number of qualifying rows" ; + ObSysVars[556].name_ = "eq_range_index_dive_limit" ; ObSysVars[556].data_type_ = ObIntType ; - ObSysVars[556].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[556].id_ = SYS_VAR_INNODB_STATS_AUTO_RECALC ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_AUTO_RECALC)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_AUTO_RECALC] = 556 ; - ObSysVars[556].base_value_ = "1" ; - ObSysVars[556].alias_ = "OB_SV_INNODB_STATS_AUTO_RECALC" ; + ObSysVars[556].min_val_ = "0" ; + ObSysVars[556].max_val_ = "4294967295" ; + ObSysVars[556].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[556].id_ = SYS_VAR_EQ_RANGE_INDEX_DIVE_LIMIT ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_EQ_RANGE_INDEX_DIVE_LIMIT)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_EQ_RANGE_INDEX_DIVE_LIMIT] = 556 ; + ObSysVars[556].base_value_ = "200" ; + ObSysVars[556].alias_ = "OB_SV_EQ_RANGE_INDEX_DIVE_LIMIT" ; }(); [&] (){ - ObSysVars[557].default_value_ = "0" ; - ObSysVars[557].info_ = "When innodb_stats_include_delete_marked is enabled, ANALYZE TABLE considers delete-marked records when recalculating statistics" ; - ObSysVars[557].name_ = "innodb_stats_include_delete_marked" ; + ObSysVars[557].default_value_ = "1" ; + ObSysVars[557].info_ = "Causes InnoDB to automatically recalculate persistent statistics after the data in a table is changed substantially, merely simulates MySQL 5.7" ; + ObSysVars[557].name_ = "innodb_stats_auto_recalc" ; ObSysVars[557].data_type_ = ObIntType ; - ObSysVars[557].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[557].id_ = SYS_VAR_INNODB_STATS_INCLUDE_DELETE_MARKED ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_INCLUDE_DELETE_MARKED)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_INCLUDE_DELETE_MARKED] = 557 ; - ObSysVars[557].base_value_ = "0" ; - ObSysVars[557].alias_ = "OB_SV_INNODB_STATS_INCLUDE_DELETE_MARKED" ; + ObSysVars[557].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[557].id_ = SYS_VAR_INNODB_STATS_AUTO_RECALC ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_AUTO_RECALC)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_AUTO_RECALC] = 557 ; + ObSysVars[557].base_value_ = "1" ; + ObSysVars[557].alias_ = "OB_SV_INNODB_STATS_AUTO_RECALC" ; }(); [&] (){ ObSysVars[558].default_value_ = "0" ; - ObSysVars[558].info_ = "How the server treats NULL values when collecting statistics about the distribution of index values for InnoDB tables" ; - ObSysVars[558].name_ = "innodb_stats_method" ; + ObSysVars[558].info_ = "When innodb_stats_include_delete_marked is enabled, ANALYZE TABLE considers delete-marked records when recalculating statistics" ; + ObSysVars[558].name_ = "innodb_stats_include_delete_marked" ; ObSysVars[558].data_type_ = ObIntType ; - ObSysVars[558].enum_names_ = "[u'nulls_equal', u'nulls_unequal', u'nulls_ignored']" ; ObSysVars[558].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[558].id_ = SYS_VAR_INNODB_STATS_METHOD ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_METHOD)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_METHOD] = 558 ; + ObSysVars[558].id_ = SYS_VAR_INNODB_STATS_INCLUDE_DELETE_MARKED ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_INCLUDE_DELETE_MARKED)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_INCLUDE_DELETE_MARKED] = 558 ; ObSysVars[558].base_value_ = "0" ; - ObSysVars[558].alias_ = "OB_SV_INNODB_STATS_METHOD" ; + ObSysVars[558].alias_ = "OB_SV_INNODB_STATS_INCLUDE_DELETE_MARKED" ; }(); [&] (){ ObSysVars[559].default_value_ = "0" ; - ObSysVars[559].info_ = "When innodb_stats_on_metadata is enabled, InnoDB updates non-persistent statistics when metadata statements such as SHOW TABLE STATUS or when accessing the Information Schema TABLES or STATISTICS tables" ; - ObSysVars[559].name_ = "innodb_stats_on_metadata" ; + ObSysVars[559].info_ = "How the server treats NULL values when collecting statistics about the distribution of index values for InnoDB tables" ; + ObSysVars[559].name_ = "innodb_stats_method" ; ObSysVars[559].data_type_ = ObIntType ; + ObSysVars[559].enum_names_ = "[u'nulls_equal', u'nulls_unequal', u'nulls_ignored']" ; ObSysVars[559].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[559].id_ = SYS_VAR_INNODB_STATS_ON_METADATA ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_ON_METADATA)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_ON_METADATA] = 559 ; + ObSysVars[559].id_ = SYS_VAR_INNODB_STATS_METHOD ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_METHOD)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_METHOD] = 559 ; ObSysVars[559].base_value_ = "0" ; - ObSysVars[559].alias_ = "OB_SV_INNODB_STATS_ON_METADATA" ; + ObSysVars[559].alias_ = "OB_SV_INNODB_STATS_METHOD" ; }(); [&] (){ - ObSysVars[560].default_value_ = "" ; - ObSysVars[560].info_ = "The session value of this variable specifies the client version token list and indicates the tokens that the client session requires the server version token list to have, merely simulates MySQL 5.7" ; - ObSysVars[560].name_ = "version_tokens_session" ; - ObSysVars[560].data_type_ = ObVarcharType ; - ObSysVars[560].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[560].id_ = SYS_VAR_VERSION_TOKENS_SESSION ; + ObSysVars[560].default_value_ = "0" ; + ObSysVars[560].info_ = "When innodb_stats_on_metadata is enabled, InnoDB updates non-persistent statistics when metadata statements such as SHOW TABLE STATUS or when accessing the Information Schema TABLES or STATISTICS tables" ; + ObSysVars[560].name_ = "innodb_stats_on_metadata" ; + ObSysVars[560].data_type_ = ObIntType ; + ObSysVars[560].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[560].id_ = SYS_VAR_INNODB_STATS_ON_METADATA ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_ON_METADATA)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_ON_METADATA] = 560 ; + ObSysVars[560].base_value_ = "0" ; + ObSysVars[560].alias_ = "OB_SV_INNODB_STATS_ON_METADATA" ; + }(); + + [&] (){ + ObSysVars[561].default_value_ = "" ; + ObSysVars[561].info_ = "The session value of this variable specifies the client version token list and indicates the tokens that the client session requires the server version token list to have, merely simulates MySQL 5.7" ; + ObSysVars[561].name_ = "version_tokens_session" ; + ObSysVars[561].data_type_ = ObVarcharType ; + ObSysVars[561].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[561].id_ = SYS_VAR_VERSION_TOKENS_SESSION ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_VERSION_TOKENS_SESSION)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_VERSION_TOKENS_SESSION] = 560 ; - ObSysVars[560].base_value_ = "" ; - ObSysVars[560].alias_ = "OB_SV_VERSION_TOKENS_SESSION" ; + ObSysVarsIdToArrayIdx[SYS_VAR_VERSION_TOKENS_SESSION] = 561 ; + ObSysVars[561].base_value_ = "" ; + ObSysVars[561].alias_ = "OB_SV_VERSION_TOKENS_SESSION" ; }(); [&] (){ - ObSysVars[561].default_value_ = "20" ; - ObSysVars[561].info_ = "The number of index pages to sample when estimating cardinality and other statistics for an indexed column, such as those calculated by ANALYZE TABLE" ; - ObSysVars[561].name_ = "innodb_stats_persistent_sample_pages" ; - ObSysVars[561].data_type_ = ObUInt64Type ; - ObSysVars[561].min_val_ = "1" ; - ObSysVars[561].max_val_ = "18446744073709551615" ; - ObSysVars[561].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[561].id_ = SYS_VAR_INNODB_STATS_PERSISTENT_SAMPLE_PAGES ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_PERSISTENT_SAMPLE_PAGES)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_PERSISTENT_SAMPLE_PAGES] = 561 ; - ObSysVars[561].base_value_ = "20" ; - ObSysVars[561].alias_ = "OB_SV_INNODB_STATS_PERSISTENT_SAMPLE_PAGES" ; - }(); - - [&] (){ - ObSysVars[562].default_value_ = "8" ; + ObSysVars[562].default_value_ = "20" ; ObSysVars[562].info_ = "The number of index pages to sample when estimating cardinality and other statistics for an indexed column, such as those calculated by ANALYZE TABLE" ; - ObSysVars[562].name_ = "innodb_stats_sample_pages" ; + ObSysVars[562].name_ = "innodb_stats_persistent_sample_pages" ; ObSysVars[562].data_type_ = ObUInt64Type ; ObSysVars[562].min_val_ = "1" ; ObSysVars[562].max_val_ = "18446744073709551615" ; ObSysVars[562].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[562].id_ = SYS_VAR_INNODB_STATS_SAMPLE_PAGES ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_SAMPLE_PAGES)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_SAMPLE_PAGES] = 562 ; - ObSysVars[562].base_value_ = "8" ; - ObSysVars[562].alias_ = "OB_SV_INNODB_STATS_SAMPLE_PAGES" ; + ObSysVars[562].id_ = SYS_VAR_INNODB_STATS_PERSISTENT_SAMPLE_PAGES ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_PERSISTENT_SAMPLE_PAGES)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_PERSISTENT_SAMPLE_PAGES] = 562 ; + ObSysVars[562].base_value_ = "20" ; + ObSysVars[562].alias_ = "OB_SV_INNODB_STATS_PERSISTENT_SAMPLE_PAGES" ; }(); [&] (){ ObSysVars[563].default_value_ = "8" ; ObSysVars[563].info_ = "The number of index pages to sample when estimating cardinality and other statistics for an indexed column, such as those calculated by ANALYZE TABLE" ; - ObSysVars[563].name_ = "innodb_stats_transient_sample_pages" ; + ObSysVars[563].name_ = "innodb_stats_sample_pages" ; ObSysVars[563].data_type_ = ObUInt64Type ; ObSysVars[563].min_val_ = "1" ; ObSysVars[563].max_val_ = "18446744073709551615" ; ObSysVars[563].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[563].id_ = SYS_VAR_INNODB_STATS_TRANSIENT_SAMPLE_PAGES ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_TRANSIENT_SAMPLE_PAGES)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_TRANSIENT_SAMPLE_PAGES] = 563 ; + ObSysVars[563].id_ = SYS_VAR_INNODB_STATS_SAMPLE_PAGES ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_SAMPLE_PAGES)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_SAMPLE_PAGES] = 563 ; ObSysVars[563].base_value_ = "8" ; - ObSysVars[563].alias_ = "OB_SV_INNODB_STATS_TRANSIENT_SAMPLE_PAGES" ; + ObSysVars[563].alias_ = "OB_SV_INNODB_STATS_SAMPLE_PAGES" ; }(); [&] (){ - ObSysVars[564].default_value_ = "" ; - ObSysVars[564].info_ = "The customer master key (CMK) ID obtained from the AWS KMS server and used by the keyring_aws plugin" ; - ObSysVars[564].name_ = "keyring_aws_cmk_id" ; - ObSysVars[564].data_type_ = ObVarcharType ; + ObSysVars[564].default_value_ = "8" ; + ObSysVars[564].info_ = "The number of index pages to sample when estimating cardinality and other statistics for an indexed column, such as those calculated by ANALYZE TABLE" ; + ObSysVars[564].name_ = "innodb_stats_transient_sample_pages" ; + ObSysVars[564].data_type_ = ObUInt64Type ; + ObSysVars[564].min_val_ = "1" ; + ObSysVars[564].max_val_ = "18446744073709551615" ; ObSysVars[564].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[564].id_ = SYS_VAR_KEYRING_AWS_CMK_ID ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_AWS_CMK_ID)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_AWS_CMK_ID] = 564 ; - ObSysVars[564].base_value_ = "" ; - ObSysVars[564].alias_ = "OB_SV_KEYRING_AWS_CMK_ID" ; + ObSysVars[564].id_ = SYS_VAR_INNODB_STATS_TRANSIENT_SAMPLE_PAGES ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_INNODB_STATS_TRANSIENT_SAMPLE_PAGES)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_INNODB_STATS_TRANSIENT_SAMPLE_PAGES] = 564 ; + ObSysVars[564].base_value_ = "8" ; + ObSysVars[564].alias_ = "OB_SV_INNODB_STATS_TRANSIENT_SAMPLE_PAGES" ; }(); [&] (){ - ObSysVars[565].default_value_ = "19" ; - ObSysVars[565].info_ = "The AWS region for the keyring_aws plugin. This variable is unavailable unless that plugin is installed" ; - ObSysVars[565].name_ = "keyring_aws_region" ; - ObSysVars[565].data_type_ = ObIntType ; - ObSysVars[565].enum_names_ = "[u'af-south-1', u'ap-east-1', u'ap-northeast-1', u'ap-northeast-2', u'ap-northeast-3', u'ap-south-1', u'ap-southeast-1', u'ap-southeast-2', u'ca-central-1', u'cn-north-1', u'cn-northwest-1', u'eu-central-1', u'eu-north-1', u'eu-south-1', u'eu-west-1', u'eu-west-2', u'eu-west-3', u'me-south-1', u'sa-east-1', u'us-east-1', u'us-east-2', u'us-gov-east-1', u'us-iso-east-1', u'us-iso-west-1', u'us-isob-east-1', u'us-west-1', u'us-west-2']" ; + ObSysVars[565].default_value_ = "" ; + ObSysVars[565].info_ = "The customer master key (CMK) ID obtained from the AWS KMS server and used by the keyring_aws plugin" ; + ObSysVars[565].name_ = "keyring_aws_cmk_id" ; + ObSysVars[565].data_type_ = ObVarcharType ; ObSysVars[565].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[565].id_ = SYS_VAR_KEYRING_AWS_REGION ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_AWS_REGION)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_AWS_REGION] = 565 ; - ObSysVars[565].base_value_ = "19" ; - ObSysVars[565].alias_ = "OB_SV_KEYRING_AWS_REGION" ; + ObSysVars[565].id_ = SYS_VAR_KEYRING_AWS_CMK_ID ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_AWS_CMK_ID)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_AWS_CMK_ID] = 565 ; + ObSysVars[565].base_value_ = "" ; + ObSysVars[565].alias_ = "OB_SV_KEYRING_AWS_CMK_ID" ; }(); [&] (){ - ObSysVars[566].default_value_ = "" ; - ObSysVars[566].info_ = "The path name of the data file used for secure data storage by the keyring_encrypted_file plugin" ; - ObSysVars[566].name_ = "keyring_encrypted_file_data" ; - ObSysVars[566].data_type_ = ObVarcharType ; + ObSysVars[566].default_value_ = "19" ; + ObSysVars[566].info_ = "The AWS region for the keyring_aws plugin. This variable is unavailable unless that plugin is installed" ; + ObSysVars[566].name_ = "keyring_aws_region" ; + ObSysVars[566].data_type_ = ObIntType ; + ObSysVars[566].enum_names_ = "[u'af-south-1', u'ap-east-1', u'ap-northeast-1', u'ap-northeast-2', u'ap-northeast-3', u'ap-south-1', u'ap-southeast-1', u'ap-southeast-2', u'ca-central-1', u'cn-north-1', u'cn-northwest-1', u'eu-central-1', u'eu-north-1', u'eu-south-1', u'eu-west-1', u'eu-west-2', u'eu-west-3', u'me-south-1', u'sa-east-1', u'us-east-1', u'us-east-2', u'us-gov-east-1', u'us-iso-east-1', u'us-iso-west-1', u'us-isob-east-1', u'us-west-1', u'us-west-2']" ; ObSysVars[566].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[566].id_ = SYS_VAR_KEYRING_ENCRYPTED_FILE_DATA ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_ENCRYPTED_FILE_DATA)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_ENCRYPTED_FILE_DATA] = 566 ; - ObSysVars[566].base_value_ = "" ; - ObSysVars[566].alias_ = "OB_SV_KEYRING_ENCRYPTED_FILE_DATA" ; + ObSysVars[566].id_ = SYS_VAR_KEYRING_AWS_REGION ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_AWS_REGION)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_AWS_REGION] = 566 ; + ObSysVars[566].base_value_ = "19" ; + ObSysVars[566].alias_ = "OB_SV_KEYRING_AWS_REGION" ; }(); [&] (){ ObSysVars[567].default_value_ = "" ; - ObSysVars[567].info_ = "The password used by the keyring_encrypted_file pluginn" ; - ObSysVars[567].name_ = "keyring_encrypted_file_password" ; + ObSysVars[567].info_ = "The path name of the data file used for secure data storage by the keyring_encrypted_file plugin" ; + ObSysVars[567].name_ = "keyring_encrypted_file_data" ; ObSysVars[567].data_type_ = ObVarcharType ; ObSysVars[567].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[567].id_ = SYS_VAR_KEYRING_ENCRYPTED_FILE_PASSWORD ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_ENCRYPTED_FILE_PASSWORD)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_ENCRYPTED_FILE_PASSWORD] = 567 ; + ObSysVars[567].id_ = SYS_VAR_KEYRING_ENCRYPTED_FILE_DATA ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_ENCRYPTED_FILE_DATA)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_ENCRYPTED_FILE_DATA] = 567 ; ObSysVars[567].base_value_ = "" ; - ObSysVars[567].alias_ = "OB_SV_KEYRING_ENCRYPTED_FILE_PASSWORD" ; + ObSysVars[567].alias_ = "OB_SV_KEYRING_ENCRYPTED_FILE_DATA" ; }(); [&] (){ ObSysVars[568].default_value_ = "" ; - ObSysVars[568].info_ = "The path name of the data file used for secure data storage by the keyring_file plugin" ; - ObSysVars[568].name_ = "keyring_file_data" ; + ObSysVars[568].info_ = "The password used by the keyring_encrypted_file pluginn" ; + ObSysVars[568].name_ = "keyring_encrypted_file_password" ; ObSysVars[568].data_type_ = ObVarcharType ; ObSysVars[568].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[568].id_ = SYS_VAR_KEYRING_FILE_DATA ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_FILE_DATA)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_FILE_DATA] = 568 ; + ObSysVars[568].id_ = SYS_VAR_KEYRING_ENCRYPTED_FILE_PASSWORD ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_ENCRYPTED_FILE_PASSWORD)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_ENCRYPTED_FILE_PASSWORD] = 568 ; ObSysVars[568].base_value_ = "" ; - ObSysVars[568].alias_ = "OB_SV_KEYRING_FILE_DATA" ; + ObSysVars[568].alias_ = "OB_SV_KEYRING_ENCRYPTED_FILE_PASSWORD" ; }(); [&] (){ ObSysVars[569].default_value_ = "" ; - ObSysVars[569].info_ = "The path name of the directory that stores configuration information used by the keyring_okv plugin" ; - ObSysVars[569].name_ = "keyring_okv_conf_dir" ; + ObSysVars[569].info_ = "The path name of the data file used for secure data storage by the keyring_file plugin" ; + ObSysVars[569].name_ = "keyring_file_data" ; ObSysVars[569].data_type_ = ObVarcharType ; ObSysVars[569].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[569].id_ = SYS_VAR_KEYRING_OKV_CONF_DIR ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_OKV_CONF_DIR)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_OKV_CONF_DIR] = 569 ; + ObSysVars[569].id_ = SYS_VAR_KEYRING_FILE_DATA ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_FILE_DATA)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_FILE_DATA] = 569 ; ObSysVars[569].base_value_ = "" ; - ObSysVars[569].alias_ = "OB_SV_KEYRING_OKV_CONF_DIR" ; + ObSysVars[569].alias_ = "OB_SV_KEYRING_FILE_DATA" ; }(); [&] (){ - ObSysVars[570].default_value_ = "1" ; - ObSysVars[570].info_ = "Whether keyring operations are enabled. This variable is used during key migration operations" ; - ObSysVars[570].name_ = "keyring_operations" ; - ObSysVars[570].data_type_ = ObIntType ; + ObSysVars[570].default_value_ = "" ; + ObSysVars[570].info_ = "The path name of the directory that stores configuration information used by the keyring_okv plugin" ; + ObSysVars[570].name_ = "keyring_okv_conf_dir" ; + ObSysVars[570].data_type_ = ObVarcharType ; ObSysVars[570].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[570].id_ = SYS_VAR_KEYRING_OPERATIONS ; + ObSysVars[570].id_ = SYS_VAR_KEYRING_OKV_CONF_DIR ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_OKV_CONF_DIR)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_OKV_CONF_DIR] = 570 ; + ObSysVars[570].base_value_ = "" ; + ObSysVars[570].alias_ = "OB_SV_KEYRING_OKV_CONF_DIR" ; + }(); + + [&] (){ + ObSysVars[571].default_value_ = "1" ; + ObSysVars[571].info_ = "Whether keyring operations are enabled. This variable is used during key migration operations" ; + ObSysVars[571].name_ = "keyring_operations" ; + ObSysVars[571].data_type_ = ObIntType ; + ObSysVars[571].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[571].id_ = SYS_VAR_KEYRING_OPERATIONS ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_KEYRING_OPERATIONS)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_OPERATIONS] = 570 ; - ObSysVars[570].base_value_ = "1" ; - ObSysVars[570].alias_ = "OB_SV_KEYRING_OPERATIONS" ; + ObSysVarsIdToArrayIdx[SYS_VAR_KEYRING_OPERATIONS] = 571 ; + ObSysVars[571].base_value_ = "1" ; + ObSysVars[571].alias_ = "OB_SV_KEYRING_OPERATIONS" ; }(); [&] (){ - ObSysVars[571].default_value_ = "" ; - ObSysVars[571].info_ = "enables control over optimizer behavior" ; - ObSysVars[571].name_ = "optimizer_switch" ; - ObSysVars[571].data_type_ = ObVarcharType ; - ObSysVars[571].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[571].id_ = SYS_VAR_OPTIMIZER_SWITCH ; + ObSysVars[572].default_value_ = "" ; + ObSysVars[572].info_ = "enables control over optimizer behavior" ; + ObSysVars[572].name_ = "optimizer_switch" ; + ObSysVars[572].data_type_ = ObVarcharType ; + ObSysVars[572].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[572].id_ = SYS_VAR_OPTIMIZER_SWITCH ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_SWITCH)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_SWITCH] = 571 ; - ObSysVars[571].base_value_ = "" ; - ObSysVars[571].alias_ = "OB_SV_OPTIMIZER_SWITCH" ; + ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_SWITCH] = 572 ; + ObSysVars[572].base_value_ = "" ; + ObSysVars[572].alias_ = "OB_SV_OPTIMIZER_SWITCH" ; }(); [&] (){ - ObSysVars[572].default_value_ = "100" ; - ObSysVars[572].info_ = "After max_connect_errors successive connection requests from a host are interrupted without a successful connection, the server blocks that host from further connections" ; - ObSysVars[572].name_ = "max_connect_errors" ; - ObSysVars[572].data_type_ = ObUInt64Type ; - ObSysVars[572].min_val_ = "1" ; - ObSysVars[572].max_val_ = "18446744073709551615" ; - ObSysVars[572].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[572].id_ = SYS_VAR_MAX_CONNECT_ERRORS ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_MAX_CONNECT_ERRORS)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_MAX_CONNECT_ERRORS] = 572 ; - ObSysVars[572].base_value_ = "100" ; - ObSysVars[572].alias_ = "OB_SV_MAX_CONNECT_ERRORS" ; - }(); - - [&] (){ - ObSysVars[573].default_value_ = "0" ; - ObSysVars[573].info_ = "Whether MySQL Enterprise Firewall is enabled (the default) or disabled" ; - ObSysVars[573].name_ = "mysql_firewall_mode" ; - ObSysVars[573].data_type_ = ObIntType ; + ObSysVars[573].default_value_ = "100" ; + ObSysVars[573].info_ = "After max_connect_errors successive connection requests from a host are interrupted without a successful connection, the server blocks that host from further connections" ; + ObSysVars[573].name_ = "max_connect_errors" ; + ObSysVars[573].data_type_ = ObUInt64Type ; + ObSysVars[573].min_val_ = "1" ; + ObSysVars[573].max_val_ = "18446744073709551615" ; ObSysVars[573].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[573].id_ = SYS_VAR_MYSQL_FIREWALL_MODE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_MYSQL_FIREWALL_MODE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_MYSQL_FIREWALL_MODE] = 573 ; - ObSysVars[573].base_value_ = "0" ; - ObSysVars[573].alias_ = "OB_SV_MYSQL_FIREWALL_MODE" ; + ObSysVars[573].id_ = SYS_VAR_MAX_CONNECT_ERRORS ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_MAX_CONNECT_ERRORS)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_MAX_CONNECT_ERRORS] = 573 ; + ObSysVars[573].base_value_ = "100" ; + ObSysVars[573].alias_ = "OB_SV_MAX_CONNECT_ERRORS" ; }(); [&] (){ ObSysVars[574].default_value_ = "0" ; - ObSysVars[574].info_ = "Whether the MySQL Enterprise Firewall trace is enabled or disabled (the default)" ; - ObSysVars[574].name_ = "mysql_firewall_trace" ; + ObSysVars[574].info_ = "Whether MySQL Enterprise Firewall is enabled (the default) or disabled" ; + ObSysVars[574].name_ = "mysql_firewall_mode" ; ObSysVars[574].data_type_ = ObIntType ; ObSysVars[574].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[574].id_ = SYS_VAR_MYSQL_FIREWALL_TRACE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_MYSQL_FIREWALL_TRACE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_MYSQL_FIREWALL_TRACE] = 574 ; + ObSysVars[574].id_ = SYS_VAR_MYSQL_FIREWALL_MODE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_MYSQL_FIREWALL_MODE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_MYSQL_FIREWALL_MODE] = 574 ; ObSysVars[574].base_value_ = "0" ; - ObSysVars[574].alias_ = "OB_SV_MYSQL_FIREWALL_TRACE" ; + ObSysVars[574].alias_ = "OB_SV_MYSQL_FIREWALL_MODE" ; }(); [&] (){ ObSysVars[575].default_value_ = "0" ; - ObSysVars[575].info_ = "This variable controls whether the mysql_native_password built-in authentication plugin supports proxy users" ; - ObSysVars[575].name_ = "mysql_native_password_proxy_users" ; + ObSysVars[575].info_ = "Whether the MySQL Enterprise Firewall trace is enabled or disabled (the default)" ; + ObSysVars[575].name_ = "mysql_firewall_trace" ; ObSysVars[575].data_type_ = ObIntType ; ObSysVars[575].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[575].id_ = SYS_VAR_MYSQL_NATIVE_PASSWORD_PROXY_USERS ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_MYSQL_NATIVE_PASSWORD_PROXY_USERS)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_MYSQL_NATIVE_PASSWORD_PROXY_USERS] = 575 ; + ObSysVars[575].id_ = SYS_VAR_MYSQL_FIREWALL_TRACE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_MYSQL_FIREWALL_TRACE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_MYSQL_FIREWALL_TRACE] = 575 ; ObSysVars[575].base_value_ = "0" ; - ObSysVars[575].alias_ = "OB_SV_MYSQL_NATIVE_PASSWORD_PROXY_USERS" ; + ObSysVars[575].alias_ = "OB_SV_MYSQL_FIREWALL_TRACE" ; }(); [&] (){ - ObSysVars[576].default_value_ = "10" ; - ObSysVars[576].info_ = "If a read or write on a communication port is interrupted, retry this many times before giving up. This value should be set quite high on FreeBSD because internal interrupts are sent to all threads" ; - ObSysVars[576].name_ = "net_retry_count" ; - ObSysVars[576].data_type_ = ObUInt64Type ; + ObSysVars[576].default_value_ = "0" ; + ObSysVars[576].info_ = "This variable controls whether the mysql_native_password built-in authentication plugin supports proxy users" ; + ObSysVars[576].name_ = "mysql_native_password_proxy_users" ; + ObSysVars[576].data_type_ = ObIntType ; ObSysVars[576].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[576].id_ = SYS_VAR_NET_RETRY_COUNT ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NET_RETRY_COUNT)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NET_RETRY_COUNT] = 576 ; - ObSysVars[576].base_value_ = "10" ; - ObSysVars[576].alias_ = "OB_SV_NET_RETRY_COUNT" ; + ObSysVars[576].id_ = SYS_VAR_MYSQL_NATIVE_PASSWORD_PROXY_USERS ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_MYSQL_NATIVE_PASSWORD_PROXY_USERS)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_MYSQL_NATIVE_PASSWORD_PROXY_USERS] = 576 ; + ObSysVars[576].base_value_ = "0" ; + ObSysVars[576].alias_ = "OB_SV_MYSQL_NATIVE_PASSWORD_PROXY_USERS" ; }(); [&] (){ - ObSysVars[577].default_value_ = "0" ; - ObSysVars[577].info_ = "This variable was used in MySQL 4.0 to turn on some 4.1 behaviors, and is retained for backward compatibility. Its value is always OFF" ; - ObSysVars[577].name_ = "new" ; - ObSysVars[577].data_type_ = ObIntType ; - ObSysVars[577].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[577].id_ = SYS_VAR_NEW ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NEW)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NEW] = 577 ; - ObSysVars[577].base_value_ = "0" ; - ObSysVars[577].alias_ = "OB_SV_NEW" ; + ObSysVars[577].default_value_ = "10" ; + ObSysVars[577].info_ = "If a read or write on a communication port is interrupted, retry this many times before giving up. This value should be set quite high on FreeBSD because internal interrupts are sent to all threads" ; + ObSysVars[577].name_ = "net_retry_count" ; + ObSysVars[577].data_type_ = ObUInt64Type ; + ObSysVars[577].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[577].id_ = SYS_VAR_NET_RETRY_COUNT ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NET_RETRY_COUNT)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NET_RETRY_COUNT] = 577 ; + ObSysVars[577].base_value_ = "10" ; + ObSysVars[577].alias_ = "OB_SV_NET_RETRY_COUNT" ; }(); [&] (){ ObSysVars[578].default_value_ = "0" ; - ObSysVars[578].info_ = "This variable controls the password hashing method used by the PASSWORD() function. It also influences password hashing performed by CREATE USER and GRANT statements that specify a password using an IDENTIFIED BY clause" ; - ObSysVars[578].name_ = "old_passwords" ; + ObSysVars[578].info_ = "This variable was used in MySQL 4.0 to turn on some 4.1 behaviors, and is retained for backward compatibility. Its value is always OFF" ; + ObSysVars[578].name_ = "new" ; ObSysVars[578].data_type_ = ObIntType ; - ObSysVars[578].enum_names_ = "[u'0', u'1', u'2']" ; ObSysVars[578].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[578].id_ = SYS_VAR_OLD_PASSWORDS ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OLD_PASSWORDS)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_OLD_PASSWORDS] = 578 ; + ObSysVars[578].id_ = SYS_VAR_NEW ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NEW)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NEW] = 578 ; ObSysVars[578].base_value_ = "0" ; - ObSysVars[578].alias_ = "OB_SV_OLD_PASSWORDS" ; + ObSysVars[578].alias_ = "OB_SV_NEW" ; }(); [&] (){ - ObSysVars[579].default_value_ = "1" ; - ObSysVars[579].info_ = "Controls the heuristics applied during query optimization to prune less-promising partial plans from the optimizer search space" ; - ObSysVars[579].name_ = "optimizer_prune_level" ; + ObSysVars[579].default_value_ = "0" ; + ObSysVars[579].info_ = "This variable controls the password hashing method used by the PASSWORD() function. It also influences password hashing performed by CREATE USER and GRANT statements that specify a password using an IDENTIFIED BY clause" ; + ObSysVars[579].name_ = "old_passwords" ; ObSysVars[579].data_type_ = ObIntType ; - ObSysVars[579].min_val_ = "0" ; - ObSysVars[579].max_val_ = "1" ; + ObSysVars[579].enum_names_ = "[u'0', u'1', u'2']" ; ObSysVars[579].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[579].id_ = SYS_VAR_OPTIMIZER_PRUNE_LEVEL ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_PRUNE_LEVEL)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_PRUNE_LEVEL] = 579 ; - ObSysVars[579].base_value_ = "1" ; - ObSysVars[579].alias_ = "OB_SV_OPTIMIZER_PRUNE_LEVEL" ; + ObSysVars[579].id_ = SYS_VAR_OLD_PASSWORDS ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OLD_PASSWORDS)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_OLD_PASSWORDS] = 579 ; + ObSysVars[579].base_value_ = "0" ; + ObSysVars[579].alias_ = "OB_SV_OLD_PASSWORDS" ; }(); [&] (){ - ObSysVars[580].default_value_ = "62" ; - ObSysVars[580].info_ = "The maximum depth of search performed by the query optimizer" ; - ObSysVars[580].name_ = "optimizer_search_depth" ; + ObSysVars[580].default_value_ = "1" ; + ObSysVars[580].info_ = "Controls the heuristics applied during query optimization to prune less-promising partial plans from the optimizer search space" ; + ObSysVars[580].name_ = "optimizer_prune_level" ; ObSysVars[580].data_type_ = ObIntType ; ObSysVars[580].min_val_ = "0" ; - ObSysVars[580].max_val_ = "62" ; + ObSysVars[580].max_val_ = "1" ; ObSysVars[580].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[580].id_ = SYS_VAR_OPTIMIZER_SEARCH_DEPTH ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_SEARCH_DEPTH)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_SEARCH_DEPTH] = 580 ; - ObSysVars[580].base_value_ = "62" ; - ObSysVars[580].alias_ = "OB_SV_OPTIMIZER_SEARCH_DEPTH" ; + ObSysVars[580].id_ = SYS_VAR_OPTIMIZER_PRUNE_LEVEL ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_PRUNE_LEVEL)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_PRUNE_LEVEL] = 580 ; + ObSysVars[580].base_value_ = "1" ; + ObSysVars[580].alias_ = "OB_SV_OPTIMIZER_PRUNE_LEVEL" ; }(); [&] (){ - ObSysVars[581].default_value_ = "" ; - ObSysVars[581].info_ = "This variable controls optimizer tracing" ; - ObSysVars[581].name_ = "optimizer_trace" ; - ObSysVars[581].data_type_ = ObVarcharType ; + ObSysVars[581].default_value_ = "62" ; + ObSysVars[581].info_ = "The maximum depth of search performed by the query optimizer" ; + ObSysVars[581].name_ = "optimizer_search_depth" ; + ObSysVars[581].data_type_ = ObIntType ; + ObSysVars[581].min_val_ = "0" ; + ObSysVars[581].max_val_ = "62" ; ObSysVars[581].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[581].id_ = SYS_VAR_OPTIMIZER_TRACE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_TRACE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_TRACE] = 581 ; - ObSysVars[581].base_value_ = "" ; - ObSysVars[581].alias_ = "OB_SV_OPTIMIZER_TRACE" ; + ObSysVars[581].id_ = SYS_VAR_OPTIMIZER_SEARCH_DEPTH ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_SEARCH_DEPTH)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_SEARCH_DEPTH] = 581 ; + ObSysVars[581].base_value_ = "62" ; + ObSysVars[581].alias_ = "OB_SV_OPTIMIZER_SEARCH_DEPTH" ; }(); [&] (){ ObSysVars[582].default_value_ = "" ; - ObSysVars[582].info_ = "This variable enables or disables selected optimizer tracing features" ; - ObSysVars[582].name_ = "optimizer_trace_features" ; + ObSysVars[582].info_ = "This variable controls optimizer tracing" ; + ObSysVars[582].name_ = "optimizer_trace" ; ObSysVars[582].data_type_ = ObVarcharType ; ObSysVars[582].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[582].id_ = SYS_VAR_OPTIMIZER_TRACE_FEATURES ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_TRACE_FEATURES)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_TRACE_FEATURES] = 582 ; + ObSysVars[582].id_ = SYS_VAR_OPTIMIZER_TRACE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_TRACE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_TRACE] = 582 ; ObSysVars[582].base_value_ = "" ; - ObSysVars[582].alias_ = "OB_SV_OPTIMIZER_TRACE_FEATURES" ; + ObSysVars[582].alias_ = "OB_SV_OPTIMIZER_TRACE" ; }(); [&] (){ - ObSysVars[583].default_value_ = "1" ; - ObSysVars[583].info_ = "The maximum number of optimizer traces to display" ; - ObSysVars[583].name_ = "optimizer_trace_limit" ; - ObSysVars[583].data_type_ = ObIntType ; - ObSysVars[583].min_val_ = "0" ; - ObSysVars[583].max_val_ = "2147483647" ; + ObSysVars[583].default_value_ = "" ; + ObSysVars[583].info_ = "This variable enables or disables selected optimizer tracing features" ; + ObSysVars[583].name_ = "optimizer_trace_features" ; + ObSysVars[583].data_type_ = ObVarcharType ; ObSysVars[583].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[583].id_ = SYS_VAR_OPTIMIZER_TRACE_LIMIT ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_TRACE_LIMIT)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_TRACE_LIMIT] = 583 ; - ObSysVars[583].base_value_ = "1" ; - ObSysVars[583].alias_ = "OB_SV_OPTIMIZER_TRACE_LIMIT" ; + ObSysVars[583].id_ = SYS_VAR_OPTIMIZER_TRACE_FEATURES ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_TRACE_FEATURES)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_TRACE_FEATURES] = 583 ; + ObSysVars[583].base_value_ = "" ; + ObSysVars[583].alias_ = "OB_SV_OPTIMIZER_TRACE_FEATURES" ; }(); [&] (){ - ObSysVars[584].default_value_ = "16384" ; - ObSysVars[584].info_ = "The maximum cumulative size of stored optimizer traces" ; - ObSysVars[584].name_ = "optimizer_trace_max_mem_size" ; - ObSysVars[584].data_type_ = ObUInt64Type ; + ObSysVars[584].default_value_ = "1" ; + ObSysVars[584].info_ = "The maximum number of optimizer traces to display" ; + ObSysVars[584].name_ = "optimizer_trace_limit" ; + ObSysVars[584].data_type_ = ObIntType ; ObSysVars[584].min_val_ = "0" ; - ObSysVars[584].max_val_ = "4294967295" ; + ObSysVars[584].max_val_ = "2147483647" ; ObSysVars[584].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[584].id_ = SYS_VAR_OPTIMIZER_TRACE_MAX_MEM_SIZE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_TRACE_MAX_MEM_SIZE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_TRACE_MAX_MEM_SIZE] = 584 ; - ObSysVars[584].base_value_ = "16384" ; - ObSysVars[584].alias_ = "OB_SV_OPTIMIZER_TRACE_MAX_MEM_SIZE" ; + ObSysVars[584].id_ = SYS_VAR_OPTIMIZER_TRACE_LIMIT ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_TRACE_LIMIT)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_TRACE_LIMIT] = 584 ; + ObSysVars[584].base_value_ = "1" ; + ObSysVars[584].alias_ = "OB_SV_OPTIMIZER_TRACE_LIMIT" ; }(); [&] (){ - ObSysVars[585].default_value_ = "-1" ; - ObSysVars[585].info_ = "The offset of optimizer traces to display" ; - ObSysVars[585].name_ = "optimizer_trace_offset" ; - ObSysVars[585].data_type_ = ObIntType ; - ObSysVars[585].min_val_ = "-2147483647" ; - ObSysVars[585].max_val_ = "2147483647" ; + ObSysVars[585].default_value_ = "16384" ; + ObSysVars[585].info_ = "The maximum cumulative size of stored optimizer traces" ; + ObSysVars[585].name_ = "optimizer_trace_max_mem_size" ; + ObSysVars[585].data_type_ = ObUInt64Type ; + ObSysVars[585].min_val_ = "0" ; + ObSysVars[585].max_val_ = "4294967295" ; ObSysVars[585].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[585].id_ = SYS_VAR_OPTIMIZER_TRACE_OFFSET ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_TRACE_OFFSET)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_TRACE_OFFSET] = 585 ; - ObSysVars[585].base_value_ = "-1" ; - ObSysVars[585].alias_ = "OB_SV_OPTIMIZER_TRACE_OFFSET" ; + ObSysVars[585].id_ = SYS_VAR_OPTIMIZER_TRACE_MAX_MEM_SIZE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_TRACE_MAX_MEM_SIZE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_TRACE_MAX_MEM_SIZE] = 585 ; + ObSysVars[585].base_value_ = "16384" ; + ObSysVars[585].alias_ = "OB_SV_OPTIMIZER_TRACE_MAX_MEM_SIZE" ; }(); [&] (){ - ObSysVars[586].default_value_ = "18446744073709551615" ; - ObSysVars[586].info_ = "The maximum amount of memory available to the parser" ; - ObSysVars[586].name_ = "parser_max_mem_size" ; - ObSysVars[586].data_type_ = ObUInt64Type ; - ObSysVars[586].min_val_ = "10000000" ; - ObSysVars[586].max_val_ = "18446744073709551615" ; + ObSysVars[586].default_value_ = "-1" ; + ObSysVars[586].info_ = "The offset of optimizer traces to display" ; + ObSysVars[586].name_ = "optimizer_trace_offset" ; + ObSysVars[586].data_type_ = ObIntType ; + ObSysVars[586].min_val_ = "-2147483647" ; + ObSysVars[586].max_val_ = "2147483647" ; ObSysVars[586].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[586].id_ = SYS_VAR_PARSER_MAX_MEM_SIZE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_PARSER_MAX_MEM_SIZE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_PARSER_MAX_MEM_SIZE] = 586 ; - ObSysVars[586].base_value_ = "18446744073709551615" ; - ObSysVars[586].alias_ = "OB_SV_PARSER_MAX_MEM_SIZE" ; + ObSysVars[586].id_ = SYS_VAR_OPTIMIZER_TRACE_OFFSET ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_OPTIMIZER_TRACE_OFFSET)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_OPTIMIZER_TRACE_OFFSET] = 586 ; + ObSysVars[586].base_value_ = "-1" ; + ObSysVars[586].alias_ = "OB_SV_OPTIMIZER_TRACE_OFFSET" ; }(); [&] (){ - ObSysVars[587].default_value_ = "0" ; - ObSysVars[587].info_ = "For statements that invoke RAND(), the source passes two values to the replica, where they are used to seed the random number generator" ; - ObSysVars[587].name_ = "rand_seed1" ; + ObSysVars[587].default_value_ = "18446744073709551615" ; + ObSysVars[587].info_ = "The maximum amount of memory available to the parser" ; + ObSysVars[587].name_ = "parser_max_mem_size" ; ObSysVars[587].data_type_ = ObUInt64Type ; - ObSysVars[587].min_val_ = "0" ; - ObSysVars[587].max_val_ = "4294967295" ; - ObSysVars[587].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[587].id_ = SYS_VAR_RAND_SEED1 ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_RAND_SEED1)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_RAND_SEED1] = 587 ; - ObSysVars[587].base_value_ = "0" ; - ObSysVars[587].alias_ = "OB_SV_RAND_SEED1" ; + ObSysVars[587].min_val_ = "10000000" ; + ObSysVars[587].max_val_ = "18446744073709551615" ; + ObSysVars[587].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[587].id_ = SYS_VAR_PARSER_MAX_MEM_SIZE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_PARSER_MAX_MEM_SIZE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_PARSER_MAX_MEM_SIZE] = 587 ; + ObSysVars[587].base_value_ = "18446744073709551615" ; + ObSysVars[587].alias_ = "OB_SV_PARSER_MAX_MEM_SIZE" ; }(); [&] (){ ObSysVars[588].default_value_ = "0" ; ObSysVars[588].info_ = "For statements that invoke RAND(), the source passes two values to the replica, where they are used to seed the random number generator" ; - ObSysVars[588].name_ = "rand_seed2" ; + ObSysVars[588].name_ = "rand_seed1" ; ObSysVars[588].data_type_ = ObUInt64Type ; ObSysVars[588].min_val_ = "0" ; ObSysVars[588].max_val_ = "4294967295" ; ObSysVars[588].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[588].id_ = SYS_VAR_RAND_SEED2 ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_RAND_SEED2)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_RAND_SEED2] = 588 ; + ObSysVars[588].id_ = SYS_VAR_RAND_SEED1 ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_RAND_SEED1)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_RAND_SEED1] = 588 ; ObSysVars[588].base_value_ = "0" ; - ObSysVars[588].alias_ = "OB_SV_RAND_SEED2" ; + ObSysVars[588].alias_ = "OB_SV_RAND_SEED1" ; }(); [&] (){ - ObSysVars[589].default_value_ = "4096" ; - ObSysVars[589].info_ = "The size in bytes of blocks that are allocated when doing range optimization" ; - ObSysVars[589].name_ = "range_alloc_block_size" ; + ObSysVars[589].default_value_ = "0" ; + ObSysVars[589].info_ = "For statements that invoke RAND(), the source passes two values to the replica, where they are used to seed the random number generator" ; + ObSysVars[589].name_ = "rand_seed2" ; ObSysVars[589].data_type_ = ObUInt64Type ; - ObSysVars[589].min_val_ = "4096" ; - ObSysVars[589].max_val_ = "18446744073709550592" ; - ObSysVars[589].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[589].id_ = SYS_VAR_RANGE_ALLOC_BLOCK_SIZE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_RANGE_ALLOC_BLOCK_SIZE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_RANGE_ALLOC_BLOCK_SIZE] = 589 ; - ObSysVars[589].base_value_ = "4096" ; - ObSysVars[589].alias_ = "OB_SV_RANGE_ALLOC_BLOCK_SIZE" ; + ObSysVars[589].min_val_ = "0" ; + ObSysVars[589].max_val_ = "4294967295" ; + ObSysVars[589].flags_ = ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[589].id_ = SYS_VAR_RAND_SEED2 ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_RAND_SEED2)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_RAND_SEED2] = 589 ; + ObSysVars[589].base_value_ = "0" ; + ObSysVars[589].alias_ = "OB_SV_RAND_SEED2" ; }(); [&] (){ - ObSysVars[590].default_value_ = "8388608" ; - ObSysVars[590].info_ = "The limit on memory consumption for the range optimizer" ; - ObSysVars[590].name_ = "range_optimizer_max_mem_size" ; + ObSysVars[590].default_value_ = "4096" ; + ObSysVars[590].info_ = "The size in bytes of blocks that are allocated when doing range optimization" ; + ObSysVars[590].name_ = "range_alloc_block_size" ; ObSysVars[590].data_type_ = ObUInt64Type ; - ObSysVars[590].min_val_ = "0" ; - ObSysVars[590].max_val_ = "18446744073709551615" ; + ObSysVars[590].min_val_ = "4096" ; + ObSysVars[590].max_val_ = "18446744073709550592" ; ObSysVars[590].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[590].id_ = SYS_VAR_RANGE_OPTIMIZER_MAX_MEM_SIZE ; + ObSysVars[590].id_ = SYS_VAR_RANGE_ALLOC_BLOCK_SIZE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_RANGE_ALLOC_BLOCK_SIZE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_RANGE_ALLOC_BLOCK_SIZE] = 590 ; + ObSysVars[590].base_value_ = "4096" ; + ObSysVars[590].alias_ = "OB_SV_RANGE_ALLOC_BLOCK_SIZE" ; + }(); + + [&] (){ + ObSysVars[591].default_value_ = "8388608" ; + ObSysVars[591].info_ = "The limit on memory consumption for the range optimizer" ; + ObSysVars[591].name_ = "range_optimizer_max_mem_size" ; + ObSysVars[591].data_type_ = ObUInt64Type ; + ObSysVars[591].min_val_ = "0" ; + ObSysVars[591].max_val_ = "18446744073709551615" ; + ObSysVars[591].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[591].id_ = SYS_VAR_RANGE_OPTIMIZER_MAX_MEM_SIZE ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_RANGE_OPTIMIZER_MAX_MEM_SIZE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_RANGE_OPTIMIZER_MAX_MEM_SIZE] = 590 ; - ObSysVars[590].base_value_ = "8388608" ; - ObSysVars[590].alias_ = "OB_SV_RANGE_OPTIMIZER_MAX_MEM_SIZE" ; + ObSysVarsIdToArrayIdx[SYS_VAR_RANGE_OPTIMIZER_MAX_MEM_SIZE] = 591 ; + ObSysVars[591].base_value_ = "8388608" ; + ObSysVars[591].alias_ = "OB_SV_RANGE_OPTIMIZER_MAX_MEM_SIZE" ; }(); [&] (){ - ObSysVars[591].default_value_ = "1" ; - ObSysVars[591].info_ = "Whether the Rewriter query rewrite plugin is enabled" ; - ObSysVars[591].name_ = "rewriter_enabled" ; - ObSysVars[591].data_type_ = ObIntType ; - ObSysVars[591].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[591].id_ = SYS_VAR_REWRITER_ENABLED ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_REWRITER_ENABLED)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_REWRITER_ENABLED] = 591 ; - ObSysVars[591].base_value_ = "1" ; - ObSysVars[591].alias_ = "OB_SV_REWRITER_ENABLED" ; - }(); - - [&] (){ - ObSysVars[592].default_value_ = "0" ; - ObSysVars[592].info_ = "For internal use in MySQL" ; - ObSysVars[592].name_ = "rewriter_verbose" ; + ObSysVars[592].default_value_ = "1" ; + ObSysVars[592].info_ = "Whether the Rewriter query rewrite plugin is enabled" ; + ObSysVars[592].name_ = "rewriter_enabled" ; ObSysVars[592].data_type_ = ObIntType ; ObSysVars[592].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[592].id_ = SYS_VAR_REWRITER_VERBOSE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_REWRITER_VERBOSE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_REWRITER_VERBOSE] = 592 ; - ObSysVars[592].base_value_ = "0" ; - ObSysVars[592].alias_ = "OB_SV_REWRITER_VERBOSE" ; + ObSysVars[592].id_ = SYS_VAR_REWRITER_ENABLED ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_REWRITER_ENABLED)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_REWRITER_ENABLED] = 592 ; + ObSysVars[592].base_value_ = "1" ; + ObSysVars[592].alias_ = "OB_SV_REWRITER_ENABLED" ; }(); [&] (){ - ObSysVars[593].default_value_ = "1" ; - ObSysVars[593].info_ = "If this variable is enabled, the server blocks connections by clients that attempt to use accounts that have passwords stored in the old (pre-4.1) format" ; - ObSysVars[593].name_ = "secure_auth" ; + ObSysVars[593].default_value_ = "0" ; + ObSysVars[593].info_ = "For internal use in MySQL" ; + ObSysVars[593].name_ = "rewriter_verbose" ; ObSysVars[593].data_type_ = ObIntType ; ObSysVars[593].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[593].id_ = SYS_VAR_SECURE_AUTH ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SECURE_AUTH)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_SECURE_AUTH] = 593 ; - ObSysVars[593].base_value_ = "1" ; - ObSysVars[593].alias_ = "OB_SV_SECURE_AUTH" ; + ObSysVars[593].id_ = SYS_VAR_REWRITER_VERBOSE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_REWRITER_VERBOSE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_REWRITER_VERBOSE] = 593 ; + ObSysVars[593].base_value_ = "0" ; + ObSysVars[593].alias_ = "OB_SV_REWRITER_VERBOSE" ; }(); [&] (){ - ObSysVars[594].default_value_ = "0" ; - ObSysVars[594].info_ = "This variable controls whether the sha256_password built-in authentication plugin supports proxy users" ; - ObSysVars[594].name_ = "sha256_password_proxy_users" ; + ObSysVars[594].default_value_ = "1" ; + ObSysVars[594].info_ = "If this variable is enabled, the server blocks connections by clients that attempt to use accounts that have passwords stored in the old (pre-4.1) format" ; + ObSysVars[594].name_ = "secure_auth" ; ObSysVars[594].data_type_ = ObIntType ; ObSysVars[594].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[594].id_ = SYS_VAR_SHA256_PASSWORD_PROXY_USERS ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHA256_PASSWORD_PROXY_USERS)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_SHA256_PASSWORD_PROXY_USERS] = 594 ; - ObSysVars[594].base_value_ = "0" ; - ObSysVars[594].alias_ = "OB_SV_SHA256_PASSWORD_PROXY_USERS" ; + ObSysVars[594].id_ = SYS_VAR_SECURE_AUTH ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SECURE_AUTH)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_SECURE_AUTH] = 594 ; + ObSysVars[594].base_value_ = "1" ; + ObSysVars[594].alias_ = "OB_SV_SECURE_AUTH" ; }(); [&] (){ ObSysVars[595].default_value_ = "0" ; - ObSysVars[595].info_ = "which affects whether MySQL 5.6 compatibility is enabled with respect to how system and status variable information is provided by the INFORMATION_SCHEMA and Performance Schema tables, and also by the SHOW VARIABLES and SHOW STATUS statements" ; - ObSysVars[595].name_ = "show_compatibility_56" ; + ObSysVars[595].info_ = "This variable controls whether the sha256_password built-in authentication plugin supports proxy users" ; + ObSysVars[595].name_ = "sha256_password_proxy_users" ; ObSysVars[595].data_type_ = ObIntType ; ObSysVars[595].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[595].id_ = SYS_VAR_SHOW_COMPATIBILITY_56 ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHOW_COMPATIBILITY_56)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_SHOW_COMPATIBILITY_56] = 595 ; + ObSysVars[595].id_ = SYS_VAR_SHA256_PASSWORD_PROXY_USERS ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHA256_PASSWORD_PROXY_USERS)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_SHA256_PASSWORD_PROXY_USERS] = 595 ; ObSysVars[595].base_value_ = "0" ; - ObSysVars[595].alias_ = "OB_SV_SHOW_COMPATIBILITY_56" ; + ObSysVars[595].alias_ = "OB_SV_SHA256_PASSWORD_PROXY_USERS" ; }(); [&] (){ ObSysVars[596].default_value_ = "0" ; - ObSysVars[596].info_ = "Enabling this variable causes SHOW CREATE TABLE to display ROW_FORMAT regardless of whether it is the default format" ; - ObSysVars[596].name_ = "show_create_table_verbosity" ; + ObSysVars[596].info_ = "which affects whether MySQL 5.6 compatibility is enabled with respect to how system and status variable information is provided by the INFORMATION_SCHEMA and Performance Schema tables, and also by the SHOW VARIABLES and SHOW STATUS statements" ; + ObSysVars[596].name_ = "show_compatibility_56" ; ObSysVars[596].data_type_ = ObIntType ; - ObSysVars[596].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[596].id_ = SYS_VAR_SHOW_CREATE_TABLE_VERBOSITY ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHOW_CREATE_TABLE_VERBOSITY)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_SHOW_CREATE_TABLE_VERBOSITY] = 596 ; + ObSysVars[596].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[596].id_ = SYS_VAR_SHOW_COMPATIBILITY_56 ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHOW_COMPATIBILITY_56)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_SHOW_COMPATIBILITY_56] = 596 ; ObSysVars[596].base_value_ = "0" ; - ObSysVars[596].alias_ = "OB_SV_SHOW_CREATE_TABLE_VERBOSITY" ; + ObSysVars[596].alias_ = "OB_SV_SHOW_COMPATIBILITY_56" ; }(); [&] (){ ObSysVars[597].default_value_ = "0" ; - ObSysVars[597].info_ = "Whether SHOW CREATE TABLE output includes comments" ; - ObSysVars[597].name_ = "show_old_temporals" ; + ObSysVars[597].info_ = "Enabling this variable causes SHOW CREATE TABLE to display ROW_FORMAT regardless of whether it is the default format" ; + ObSysVars[597].name_ = "show_create_table_verbosity" ; ObSysVars[597].data_type_ = ObIntType ; ObSysVars[597].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[597].id_ = SYS_VAR_SHOW_OLD_TEMPORALS ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHOW_OLD_TEMPORALS)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_SHOW_OLD_TEMPORALS] = 597 ; + ObSysVars[597].id_ = SYS_VAR_SHOW_CREATE_TABLE_VERBOSITY ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHOW_CREATE_TABLE_VERBOSITY)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_SHOW_CREATE_TABLE_VERBOSITY] = 597 ; ObSysVars[597].base_value_ = "0" ; - ObSysVars[597].alias_ = "OB_SV_SHOW_OLD_TEMPORALS" ; + ObSysVars[597].alias_ = "OB_SV_SHOW_CREATE_TABLE_VERBOSITY" ; }(); [&] (){ - ObSysVars[598].default_value_ = "1" ; - ObSysVars[598].info_ = "If set to OFF, MySQL aborts SELECT statements that are likely to take a very long time to execute" ; - ObSysVars[598].name_ = "sql_big_selects" ; + ObSysVars[598].default_value_ = "0" ; + ObSysVars[598].info_ = "Whether SHOW CREATE TABLE output includes comments" ; + ObSysVars[598].name_ = "show_old_temporals" ; ObSysVars[598].data_type_ = ObIntType ; ObSysVars[598].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[598].id_ = SYS_VAR_SQL_BIG_SELECTS ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SQL_BIG_SELECTS)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_SQL_BIG_SELECTS] = 598 ; - ObSysVars[598].base_value_ = "1" ; - ObSysVars[598].alias_ = "OB_SV_SQL_BIG_SELECTS" ; + ObSysVars[598].id_ = SYS_VAR_SHOW_OLD_TEMPORALS ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SHOW_OLD_TEMPORALS)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_SHOW_OLD_TEMPORALS] = 598 ; + ObSysVars[598].base_value_ = "0" ; + ObSysVars[598].alias_ = "OB_SV_SHOW_OLD_TEMPORALS" ; }(); [&] (){ ObSysVars[599].default_value_ = "1" ; - ObSysVars[599].info_ = "This variable controls whether updates to a view can be made when the view does not contain all columns of the primary key defined in the underlying table, if the update statement contains a LIMIT clause" ; - ObSysVars[599].name_ = "updatable_views_with_limit" ; + ObSysVars[599].info_ = "If set to OFF, MySQL aborts SELECT statements that are likely to take a very long time to execute" ; + ObSysVars[599].name_ = "sql_big_selects" ; ObSysVars[599].data_type_ = ObIntType ; - ObSysVars[599].enum_names_ = "[u'OFF', u'ON', u'NO', u'YES']" ; ObSysVars[599].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[599].id_ = SYS_VAR_UPDATABLE_VIEWS_WITH_LIMIT ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_UPDATABLE_VIEWS_WITH_LIMIT)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_UPDATABLE_VIEWS_WITH_LIMIT] = 599 ; + ObSysVars[599].id_ = SYS_VAR_SQL_BIG_SELECTS ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_SQL_BIG_SELECTS)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_SQL_BIG_SELECTS] = 599 ; ObSysVars[599].base_value_ = "1" ; - ObSysVars[599].alias_ = "OB_SV_UPDATABLE_VIEWS_WITH_LIMIT" ; + ObSysVars[599].alias_ = "OB_SV_SQL_BIG_SELECTS" ; }(); [&] (){ - ObSysVars[600].default_value_ = "" ; - ObSysVars[600].info_ = "The path name of the dictionary file that validate_password uses for checking passwords." ; - ObSysVars[600].name_ = "validate_password_dictionary_file" ; - ObSysVars[600].data_type_ = ObVarcharType ; - ObSysVars[600].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[600].id_ = SYS_VAR_VALIDATE_PASSWORD_DICTIONARY_FILE ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_VALIDATE_PASSWORD_DICTIONARY_FILE)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_VALIDATE_PASSWORD_DICTIONARY_FILE] = 600 ; - ObSysVars[600].base_value_ = "" ; - ObSysVars[600].alias_ = "OB_SV_VALIDATE_PASSWORD_DICTIONARY_FILE" ; + ObSysVars[600].default_value_ = "1" ; + ObSysVars[600].info_ = "This variable controls whether updates to a view can be made when the view does not contain all columns of the primary key defined in the underlying table, if the update statement contains a LIMIT clause" ; + ObSysVars[600].name_ = "updatable_views_with_limit" ; + ObSysVars[600].data_type_ = ObIntType ; + ObSysVars[600].enum_names_ = "[u'OFF', u'ON', u'NO', u'YES']" ; + ObSysVars[600].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::SESSION_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[600].id_ = SYS_VAR_UPDATABLE_VIEWS_WITH_LIMIT ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_UPDATABLE_VIEWS_WITH_LIMIT)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_UPDATABLE_VIEWS_WITH_LIMIT] = 600 ; + ObSysVars[600].base_value_ = "1" ; + ObSysVars[600].alias_ = "OB_SV_UPDATABLE_VIEWS_WITH_LIMIT" ; }(); [&] (){ - ObSysVars[601].default_value_ = "100" ; - ObSysVars[601].info_ = "" ; - ObSysVars[601].name_ = "delayed_insert_limit" ; - ObSysVars[601].data_type_ = ObUInt64Type ; - ObSysVars[601].min_val_ = "1" ; - ObSysVars[601].max_val_ = "18446744073709551615" ; + ObSysVars[601].default_value_ = "" ; + ObSysVars[601].info_ = "The path name of the dictionary file that validate_password uses for checking passwords." ; + ObSysVars[601].name_ = "validate_password_dictionary_file" ; + ObSysVars[601].data_type_ = ObVarcharType ; ObSysVars[601].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; - ObSysVars[601].id_ = SYS_VAR_DELAYED_INSERT_LIMIT ; + ObSysVars[601].id_ = SYS_VAR_VALIDATE_PASSWORD_DICTIONARY_FILE ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_VALIDATE_PASSWORD_DICTIONARY_FILE)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_VALIDATE_PASSWORD_DICTIONARY_FILE] = 601 ; + ObSysVars[601].base_value_ = "" ; + ObSysVars[601].alias_ = "OB_SV_VALIDATE_PASSWORD_DICTIONARY_FILE" ; + }(); + + [&] (){ + ObSysVars[602].default_value_ = "100" ; + ObSysVars[602].info_ = "" ; + ObSysVars[602].name_ = "delayed_insert_limit" ; + ObSysVars[602].data_type_ = ObUInt64Type ; + ObSysVars[602].min_val_ = "1" ; + ObSysVars[602].max_val_ = "18446744073709551615" ; + ObSysVars[602].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY ; + ObSysVars[602].id_ = SYS_VAR_DELAYED_INSERT_LIMIT ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_DELAYED_INSERT_LIMIT)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_DELAYED_INSERT_LIMIT] = 601 ; - ObSysVars[601].base_value_ = "100" ; - ObSysVars[601].alias_ = "OB_SV_DELAYED_INSERT_LIMIT" ; + ObSysVarsIdToArrayIdx[SYS_VAR_DELAYED_INSERT_LIMIT] = 602 ; + ObSysVars[602].base_value_ = "100" ; + ObSysVars[602].alias_ = "OB_SV_DELAYED_INSERT_LIMIT" ; }(); [&] (){ - ObSysVars[602].default_value_ = "" ; - ObSysVars[602].info_ = "NDB engine version in ndb-x.y.z format" ; - ObSysVars[602].name_ = "ndb_version" ; - ObSysVars[602].data_type_ = ObVarcharType ; - ObSysVars[602].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[602].id_ = SYS_VAR_NDB_VERSION ; - cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_VERSION)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_NDB_VERSION] = 602 ; - ObSysVars[602].base_value_ = "" ; - ObSysVars[602].alias_ = "OB_SV_NDB_VERSION" ; - }(); - - [&] (){ - ObSysVars[603].default_value_ = "1" ; - ObSysVars[603].info_ = "This variable is available if the server was compiled using OpenSSL. It controls whether the server autogenerates SSL key and certificate files in the data directory, if they do not already exist" ; - ObSysVars[603].name_ = "auto_generate_certs" ; - ObSysVars[603].data_type_ = ObIntType ; + ObSysVars[603].default_value_ = "" ; + ObSysVars[603].info_ = "NDB engine version in ndb-x.y.z format" ; + ObSysVars[603].name_ = "ndb_version" ; + ObSysVars[603].data_type_ = ObVarcharType ; ObSysVars[603].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; - ObSysVars[603].id_ = SYS_VAR_AUTO_GENERATE_CERTS ; + ObSysVars[603].id_ = SYS_VAR_NDB_VERSION ; + cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_NDB_VERSION)) ; + ObSysVarsIdToArrayIdx[SYS_VAR_NDB_VERSION] = 603 ; + ObSysVars[603].base_value_ = "" ; + ObSysVars[603].alias_ = "OB_SV_NDB_VERSION" ; + }(); + + [&] (){ + ObSysVars[604].default_value_ = "1" ; + ObSysVars[604].info_ = "This variable is available if the server was compiled using OpenSSL. It controls whether the server autogenerates SSL key and certificate files in the data directory, if they do not already exist" ; + ObSysVars[604].name_ = "auto_generate_certs" ; + ObSysVars[604].data_type_ = ObIntType ; + ObSysVars[604].flags_ = ObSysVarFlag::GLOBAL_SCOPE | ObSysVarFlag::MYSQL_ONLY | ObSysVarFlag::READONLY ; + ObSysVars[604].id_ = SYS_VAR_AUTO_GENERATE_CERTS ; cur_max_var_id = MAX(cur_max_var_id, static_cast(SYS_VAR_AUTO_GENERATE_CERTS)) ; - ObSysVarsIdToArrayIdx[SYS_VAR_AUTO_GENERATE_CERTS] = 603 ; - ObSysVars[603].base_value_ = "1" ; - ObSysVars[603].alias_ = "OB_SV_AUTO_GENERATE_CERTS" ; + ObSysVarsIdToArrayIdx[SYS_VAR_AUTO_GENERATE_CERTS] = 604 ; + ObSysVars[604].base_value_ = "1" ; + ObSysVars[604].alias_ = "OB_SV_AUTO_GENERATE_CERTS" ; }(); if (cur_max_var_id >= ObSysVarFactory::OB_MAX_SYS_VAR_ID) { @@ -8416,7 +8431,7 @@ static struct VarsInit{ } }vars_init; -static int64_t var_amount = 604; +static int64_t var_amount = 605; int64_t ObSysVariables::get_all_sys_var_count(){ return ObSysVarFactory::ALL_SYS_VARS_COUNT;} ObSysVarClassType ObSysVariables::get_sys_var_id(int64_t i){ return ObSysVars[i].id_;} diff --git a/src/share/system_variable/ob_system_variable_init.json b/src/share/system_variable/ob_system_variable_init.json index 96a3146b53..d2db756695 100644 --- a/src/share/system_variable/ob_system_variable_init.json +++ b/src/share/system_variable/ob_system_variable_init.json @@ -7275,16 +7275,17 @@ "ob_hnsw_ef_search": { "id": 10419, "name": "ob_hnsw_ef_search", - "default_value": "40", + "default_value": "64", "base_value": "40", "data_type": "uint", "info": "The number of neighbor nodes considered during any HNSW vector index search on the session", "flags": "SESSION", + "min_val": "1", + "max_val": "1000", "publish_version": "433", "info_cn": "", "background_cn": "", - "ref_url": "", - "placeholder": true + "ref_url": "" }, "ndb_allow_copying_alter_table": { "id": 10420, diff --git a/src/share/vector/expr_cmp_func.cpp b/src/share/vector/expr_cmp_func.cpp index 7af412a88a..5d4e6d026b 100644 --- a/src/share/vector/expr_cmp_func.cpp +++ b/src/share/vector/expr_cmp_func.cpp @@ -20,6 +20,7 @@ #include "share/vector/ob_uniform_format.h" #include "share/vector/ob_fixed_length_format.h" #include "sql/engine/ob_serializable_function.h" +#include "sql/engine/expr/ob_array_expr_utils.h" #define NULL_FIRST_IDX 0 #define NULL_LAST_IDX 1 @@ -109,6 +110,83 @@ RowCmpFunc VectorCmpExprFuncsHelper::get_row_cmp_func(const sql::ObDatumMeta &l_ } // ===================== expr cmp functions ===================== + +struct ObNestedVectorCmpFunc +{ + template + static int cmp(const LeftVector *l_vec, const RightVector *r_vec, + const int64_t idx, const ObExpr &expr, ObEvalCtx &ctx, int &cmp_ret) + { + int ret = OB_SUCCESS; + cmp_ret = 0; + ObString left = l_vec->get_string(idx); + ObString right = r_vec->get_string(idx); + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + const uint16_t left_meta_id = expr.args_[0]->obj_meta_.get_subschema_id(); + const uint16_t right_meta_id = expr.args_[1]->obj_meta_.get_subschema_id(); + const uint16_t res_meta_id = expr.obj_meta_.get_subschema_id(); + ObIArrayType *left_obj = NULL; + ObIArrayType *right_obj = NULL; + ObIArrayType *res_obj = NULL; + ObString res_str; + if (l_vec->get_format() == VEC_UNIFORM || l_vec->get_format() == VEC_UNIFORM_CONST) { + ret = construct_param(tmp_allocator, ctx, left_meta_id, left, left_obj); + } else { + ret = construct_attr_param(tmp_allocator, ctx, *expr.args_[0], left_meta_id, idx, left_obj); + } + if (OB_FAIL(ret)) { + } else if (r_vec->get_format() == VEC_UNIFORM || r_vec->get_format() == VEC_UNIFORM_CONST) { + ret = construct_param(tmp_allocator, ctx, right_meta_id, right, right_obj); + } else { + ret = construct_attr_param(tmp_allocator, ctx, *expr.args_[1], right_meta_id, idx, right_obj); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(left_obj->compare(*right_obj, cmp_ret))) { + SQL_ENG_LOG(WARN, "init nested obj failed", K(ret)); + } + return ret; + } + + static int construct_attr_param(ObIAllocator &alloc, ObEvalCtx &ctx, ObExpr ¶m_expr, + const uint16_t meta_id, int64_t row_idx, ObIArrayType *¶m_obj) + { + int ret = OB_SUCCESS; + if (OB_FAIL(ObArrayExprUtils::construct_array_obj(alloc, ctx, meta_id, param_obj))) { + LOG_WARN("construct array obj failed", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::assemble_array_attrs(ctx, param_expr, row_idx, param_obj))) { + LOG_WARN("assemble array attrs failed", K(ret)); + } + return ret; + } + + static int construct_param(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t meta_id, + ObString &str_data, ObIArrayType *¶m_obj) + { + return ObArrayExprUtils::get_array_obj(alloc, ctx, meta_id, str_data, param_obj); + } + + static int construct_res_obj(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t meta_id, ObIArrayType *&res_obj) + { + return ObArrayExprUtils::construct_array_obj(alloc, ctx, meta_id, res_obj, false); + } + + static int construct_params(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t left_meta_id, + const uint16_t right_meta_id, const uint16_t res_meta_id, ObString &left, ObString right, + ObIArrayType *&left_obj, ObIArrayType *&right_obj, ObIArrayType *&res_obj) + { + int ret = OB_SUCCESS; + if (OB_FAIL(ObArrayExprUtils::get_array_obj(alloc, ctx, left_meta_id, left, left_obj))) { + SQL_ENG_LOG(WARN, "get array failed", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::get_array_obj(alloc, ctx, right_meta_id, right, right_obj))) { + SQL_ENG_LOG(WARN, "get array failed", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::construct_array_obj(alloc, ctx, res_meta_id, res_obj, false))) { + SQL_ENG_LOG(WARN, "construct res array failed", K(ret)); + } + return ret; + } +}; + template static int eval_right_operand(const ObExpr &cmp_expr, const ObExpr &left, const ObExpr &right, ObEvalCtx &ctx, const ObBitVector &skip, const EvalBound &bound) @@ -183,6 +261,7 @@ template <> int get_cmp_ret (const int ret) { return ret; } do { \ LVec *l_vector = static_cast(left.get_vector(ctx)); \ RVec *r_vector = static_cast(right.get_vector(ctx)); \ + bool is_nested = left.is_nested_expr() || right.is_nested_expr(); \ ResVec *res_vec = static_cast(expr.get_vector(ctx)); \ ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); \ const char *l_payload = nullptr, *r_payload = nullptr; \ @@ -192,11 +271,15 @@ template <> int get_cmp_ret (const int ret) { return ret; } if (OB_LIKELY(bound.get_all_rows_active() && bound.is_full_size() \ && eval_flags.accumulate_bit_cnt(bound.batch_size()) == 0)) { \ for (int i = bound.start(); OB_SUCC(ret) && i < bound.end(); i++) { \ - l_vector->get_payload(i, l_payload, l_len); \ - r_vector->get_payload(i, r_payload, r_len); \ - ret = VecTCCmpCalc::cmp(left.obj_meta_, right.obj_meta_, \ - (const void *)l_payload, l_len, \ - (const void *)r_payload, r_len, cmp_ret); \ + if (is_nested) { \ + ret = ObNestedVectorCmpFunc::cmp(l_vector, r_vector, i, expr, ctx, cmp_ret); \ + } else { \ + l_vector->get_payload(i, l_payload, l_len); \ + r_vector->get_payload(i, r_payload, r_len); \ + ret = VecTCCmpCalc::cmp(left.obj_meta_, right.obj_meta_, \ + (const void *)l_payload, l_len, \ + (const void *)r_payload, r_len, cmp_ret); \ + } \ if (OB_FAIL(ret)) { \ } else { \ res_vec->set_int(i, get_cmp_ret(cmp_ret)); \ @@ -206,11 +289,15 @@ template <> int get_cmp_ret (const int ret) { return ret; } } else { \ for (int i = bound.start(); OB_SUCC(ret) && i < bound.end(); i++) { \ if (skip.at(i) || eval_flags.at(i)) { continue; } \ - l_vector->get_payload(i, l_payload, l_len); \ - r_vector->get_payload(i, r_payload, r_len); \ - ret = VecTCCmpCalc::cmp(left.obj_meta_, right.obj_meta_, \ - (const void *)l_payload, l_len, \ - (const void *)r_payload, r_len, cmp_ret); \ + if (is_nested) { \ + ret = ObNestedVectorCmpFunc::cmp(l_vector, r_vector, i, expr, ctx, cmp_ret); \ + } else { \ + l_vector->get_payload(i, l_payload, l_len); \ + r_vector->get_payload(i, r_payload, r_len); \ + ret = VecTCCmpCalc::cmp(left.obj_meta_, right.obj_meta_, \ + (const void *)l_payload, l_len, \ + (const void *)r_payload, r_len, cmp_ret); \ + } \ if (OB_FAIL(ret)) { \ } else { \ res_vec->set_int(i, get_cmp_ret(cmp_ret)); \ @@ -225,11 +312,15 @@ template <> int get_cmp_ret (const int ret) { return ret; } res_vec->set_null(i); \ eval_flags.set(i); \ } else { \ - l_vector->get_payload(i, l_payload, l_len); \ - r_vector->get_payload(i, r_payload, r_len); \ - ret = VecTCCmpCalc::cmp(left.obj_meta_, right.obj_meta_, \ - (const void *)l_payload, l_len, \ - (const void *)r_payload, r_len, cmp_ret); \ + if (is_nested) { \ + ret = ObNestedVectorCmpFunc::cmp(l_vector, r_vector, i, expr, ctx, cmp_ret); \ + } else { \ + l_vector->get_payload(i, l_payload, l_len); \ + r_vector->get_payload(i, r_payload, r_len); \ + ret = VecTCCmpCalc::cmp(left.obj_meta_, right.obj_meta_, \ + (const void *)l_payload, l_len, \ + (const void *)r_payload, r_len, cmp_ret); \ + } \ if (OB_FAIL(ret)) { \ } else { \ res_vec->set_int(i, get_cmp_ret(cmp_ret)); \ diff --git a/src/share/vector/ob_continuous_vector.cpp b/src/share/vector/ob_continuous_vector.cpp index 86d7a841d7..da8c5d623b 100644 --- a/src/share/vector/ob_continuous_vector.cpp +++ b/src/share/vector/ob_continuous_vector.cpp @@ -126,6 +126,7 @@ template class ObContinuousVector>; template class ObContinuousVector>; template class ObContinuousVector>; template class ObContinuousVector>; +template class ObContinuousVector>; template class ObContinuousVector>; } // end namespace common diff --git a/src/share/vector/ob_discrete_vector.cpp b/src/share/vector/ob_discrete_vector.cpp index e70a29c2fa..a0b6c38eb7 100644 --- a/src/share/vector/ob_discrete_vector.cpp +++ b/src/share/vector/ob_discrete_vector.cpp @@ -127,6 +127,7 @@ template class ObDiscreteVector>; template class ObDiscreteVector>; template class ObDiscreteVector>; template class ObDiscreteVector>; +template class ObDiscreteVector>; template class ObDiscreteVector>; } // end namespace common } // end namespace oceanbase diff --git a/src/share/vector/ob_uniform_vector.cpp b/src/share/vector/ob_uniform_vector.cpp index 6da3b04b8a..281db1d953 100644 --- a/src/share/vector/ob_uniform_vector.cpp +++ b/src/share/vector/ob_uniform_vector.cpp @@ -152,6 +152,7 @@ template class ObUniformVector>; template class ObUniformVector>; template class ObUniformVector>; template class ObUniformVector>; +template class ObUniformVector>; template class ObUniformVector>; template class ObUniformVector>; @@ -186,6 +187,7 @@ template class ObUniformVector>; template class ObUniformVector>; template class ObUniformVector>; template class ObUniformVector>; +template class ObUniformVector>; template class ObUniformVector>; } // end namespace common } // end namespace oceanbase diff --git a/src/share/vector/ob_vector_define.h b/src/share/vector/ob_vector_define.h index 428756fa29..c9ec709135 100644 --- a/src/share/vector/ob_vector_define.h +++ b/src/share/vector/ob_vector_define.h @@ -113,6 +113,7 @@ DEFINE_CONTINUOUS_VECTOR(LobContVec, VEC_TC_LOB); DEFINE_CONTINUOUS_VECTOR(JsonContVec, VEC_TC_JSON); DEFINE_CONTINUOUS_VECTOR(GeoContVec, VEC_TC_GEO); DEFINE_CONTINUOUS_VECTOR(UdtContVec, VEC_TC_UDT); +DEFINE_CONTINUOUS_VECTOR(CollContVec, VEC_TC_COLLECTION); DEFINE_CONTINUOUS_VECTOR(RoaringBitmapContVec, VEC_TC_ROARINGBITMAP); #undef DEFINE_CONTINUOUS_VECTOR @@ -128,6 +129,7 @@ constexpr bool is_continuous_vec(const VecValueTypeClass tc) || tc == VEC_TC_JSON || tc == VEC_TC_GEO || tc == VEC_TC_UDT + || tc == VEC_TC_COLLECTION || tc == VEC_TC_ROARINGBITMAP); } @@ -143,6 +145,7 @@ DEFINE_DISCRETE_VECTOR(LobDiscVec, VEC_TC_LOB); DEFINE_DISCRETE_VECTOR(JsonDiscVec, VEC_TC_JSON); DEFINE_DISCRETE_VECTOR(GeoDiscVec, VEC_TC_GEO); DEFINE_DISCRETE_VECTOR(UdtDiscVec, VEC_TC_UDT); +DEFINE_DISCRETE_VECTOR(CollDiscVec, VEC_TC_COLLECTION); DEFINE_DISCRETE_VECTOR(RoaringBitmapDiscVec, VEC_TC_ROARINGBITMAP); #undef DEFINE_DISCRETE_VECTOR @@ -158,6 +161,7 @@ constexpr bool is_discrete_vec(const VecValueTypeClass vec_tc) || vec_tc == VEC_TC_JSON || vec_tc == VEC_TC_GEO || vec_tc == VEC_TC_UDT + || vec_tc == VEC_TC_COLLECTION || vec_tc == VEC_TC_ROARINGBITMAP); } @@ -195,6 +199,7 @@ DEFINE_UNIFORM_VECTOR(LobUniVec, VEC_TC_LOB); DEFINE_UNIFORM_VECTOR(JsonUniVec, VEC_TC_JSON); DEFINE_UNIFORM_VECTOR(GeoUniVec, VEC_TC_GEO); DEFINE_UNIFORM_VECTOR(UdtUniVec, VEC_TC_UDT); +DEFINE_UNIFORM_VECTOR(CollUniVec, VEC_TC_COLLECTION); DEFINE_UNIFORM_VECTOR(RoaringBitmapUniVec, VEC_TC_ROARINGBITMAP); #undef DEFINE_UNIFORM_VECTOR @@ -237,6 +242,7 @@ DEFINE_UNIFORM_CONST_VECTOR(LobUniCVec, VEC_TC_LOB); DEFINE_UNIFORM_CONST_VECTOR(JsonUniCVec, VEC_TC_JSON); DEFINE_UNIFORM_CONST_VECTOR(GeoUniCVec, VEC_TC_GEO); DEFINE_UNIFORM_CONST_VECTOR(UdtUniCVec, VEC_TC_UDT); +DEFINE_UNIFORM_CONST_VECTOR(CollUniCVec, VEC_TC_COLLECTION); DEFINE_UNIFORM_CONST_VECTOR(RoaringBitmapUniCVec, VEC_TC_ROARINGBITMAP); #undef DEFINE_UNIFORM_CONST_VECTOR diff --git a/src/share/vector/vector_basic_op.h b/src/share/vector/vector_basic_op.h index c1d804d638..d117615ad3 100644 --- a/src/share/vector/vector_basic_op.h +++ b/src/share/vector/vector_basic_op.h @@ -316,6 +316,34 @@ struct VecTCHashCalc } }; +template +struct VecTCHashCalc +{ + inline static int hash(HASH_ARG_LIST) + { + int ret = OB_SUCCESS; + ObString bin_str; + res = 0; + common::ObArenaAllocator allocator(ObModIds::OB_LOB_READER, OB_MALLOC_NORMAL_BLOCK_SIZE, + MTL_ID()); + ObTextStringIter str_iter(ObJsonType, CS_TYPE_BINARY, + ObString(len, reinterpret_cast(data)), + meta.has_lob_header()); + if (OB_FAIL(str_iter.init(0, NULL, &allocator))) { + COMMON_LOG(WARN, "Lob: str iter init failed", K(ret)); + } else if (OB_FAIL(str_iter.get_full_data(bin_str))) { + COMMON_LOG(WARN, "Lob: str iter get full data failed", K(ret)); + } else { + res = seed; + if (bin_str.length() > 0) { + res = ObCharset::hash(CS_TYPE_BINARY, bin_str.ptr(), bin_str.length(), seed, false, + HashMethod::is_varchar_hash ? HashMethod::hash : NULL); + } + } + return ret; + } +}; + template struct VecTCHashCalc { @@ -755,6 +783,18 @@ struct VecTCCmpCalc } }; +template<> +struct VecTCCmpCalc +{ + static const constexpr bool defined_ = true; + inline static int cmp(CMP_ARG_LIST) + { + int ret = OB_SUCCESS; + // not used + return ret; + } +}; + template<> struct VecTCCmpCalc { @@ -795,7 +835,6 @@ struct VecTCCmpCalc } }; - // null type comparison struct VecDummyCmpCalc diff --git a/src/share/vector_index/ob_plugin_vector_index_adaptor.cpp b/src/share/vector_index/ob_plugin_vector_index_adaptor.cpp new file mode 100644 index 0000000000..032c5b4e92 --- /dev/null +++ b/src/share/vector_index/ob_plugin_vector_index_adaptor.cpp @@ -0,0 +1,2097 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SHARE + +#include "ob_plugin_vector_index_adaptor.h" +#include "ob_plugin_vector_index_serialize.h" +#include "lib/utility/ob_unify_serialize.h" +#include "storage/tx_storage/ob_access_service.h" +#include "share/schema/ob_table_dml_param.h" +#include "storage/access/ob_table_scan_iterator.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" +#include "share/vector_index/ob_vector_index_util.h" +#include "sql/das/ob_das_dml_vec_iter.h" +#include "lib/vector/ob_vector_util.h" +#include "lib/random/ob_random.h" + +namespace oceanbase +{ +namespace share +{ + +ObVectorIndexInfo::ObVectorIndexInfo() + : ls_id_(share::ObLSID::INVALID_LS_ID), + rowkey_vid_table_id_(common::OB_INVALID_ID), + vid_rowkey_table_id_(common::OB_INVALID_ID), + inc_index_table_id_(common::OB_INVALID_ID), + vbitmap_table_id_(common::OB_INVALID_ID), + snapshot_index_table_id_(common::OB_INVALID_ID), + data_table_id_(common::OB_INVALID_ID), + rowkey_vid_tablet_id_(common::ObTabletID::INVALID_TABLET_ID), + vid_rowkey_tablet_id_(common::ObTabletID::INVALID_TABLET_ID), + inc_index_tablet_id_(common::ObTabletID::INVALID_TABLET_ID), + vbitmap_tablet_id_(common::ObTabletID::INVALID_TABLET_ID), + snapshot_index_tablet_id_(common::ObTabletID::INVALID_TABLET_ID), + data_tablet_id_(common::ObTabletID::INVALID_TABLET_ID), + statistics_(), + sync_info_() +{ + MEMSET(statistics_, '\0', sizeof(statistics_)); + MEMSET(sync_info_, '\0', sizeof(sync_info_)); +} + +void ObVectorIndexInfo::reset() +{ + ls_id_ = share::ObLSID::INVALID_LS_ID; + rowkey_vid_table_id_ = common::OB_INVALID_ID; + vid_rowkey_table_id_ = common::OB_INVALID_ID; + inc_index_table_id_ = common::OB_INVALID_ID; + vbitmap_table_id_ = common::OB_INVALID_ID; + snapshot_index_table_id_ = common::OB_INVALID_ID; + data_table_id_ = common::OB_INVALID_ID; + rowkey_vid_tablet_id_ = common::ObTabletID::INVALID_TABLET_ID; + vid_rowkey_tablet_id_ = common::ObTabletID::INVALID_TABLET_ID; + inc_index_tablet_id_ = common::ObTabletID::INVALID_TABLET_ID; + vbitmap_tablet_id_ = common::ObTabletID::INVALID_TABLET_ID; + snapshot_index_tablet_id_ = common::ObTabletID::INVALID_TABLET_ID; + data_tablet_id_ = common::ObTabletID::INVALID_TABLET_ID; + MEMSET(statistics_, '\0', sizeof(statistics_)); + MEMSET(sync_info_, '\0', sizeof(sync_info_)); +} + +OB_DEF_SERIALIZE_SIZE(ObVectorIndexHNSWParam) +{ + int64_t len = 0; + int ret = OB_SUCCESS; + LST_DO_CODE(OB_UNIS_ADD_LEN, + type_, + lib_, + dist_algorithm_, + dim_, + m_, + ef_construction_, + ef_search_); + return len; +} + +OB_DEF_SERIALIZE(ObVectorIndexHNSWParam) +{ + int ret = OB_SUCCESS; + LST_DO_CODE(OB_UNIS_ENCODE, + type_, + lib_, + dist_algorithm_, + dim_, + m_, + ef_construction_, + ef_search_); + return ret; +} + +OB_DEF_DESERIALIZE(ObVectorIndexAlgorithmHeader) +{ + int ret = OB_SUCCESS; + OB_UNIS_DECODE(type_); + return ret; +} + +OB_DEF_DESERIALIZE(ObVectorIndexHNSWParam) +{ + int ret = OB_SUCCESS; + LST_DO_CODE(OB_UNIS_DECODE, + type_, + lib_, + dist_algorithm_, + dim_, + m_, + ef_construction_, + ef_search_); + return ret; +} + +ObVectorQueryAdaptorResultContext::~ObVectorQueryAdaptorResultContext() { + status_ = PVQ_START; + flag_ = PVQP_MAX; + if (OB_NOT_NULL(bitmaps_)) { + if (OB_NOT_NULL(bitmaps_->insert_bitmap_)) { + roaring::api::roaring64_bitmap_free(bitmaps_->insert_bitmap_); + } + if (OB_NOT_NULL(bitmaps_->delete_bitmap_)) { + roaring::api::roaring64_bitmap_free(bitmaps_->delete_bitmap_); + } + } +}; + +int ObVectorQueryAdaptorResultContext::init_bitmaps() +{ + INIT_SUCC(ret); + if (OB_ISNULL(tmp_allocator_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("ctx allocator invalid.", K(ret)); + } else if (OB_ISNULL(bitmaps_ = static_cast + (tmp_allocator_->alloc(sizeof(ObVectorIndexRoaringBitMap))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create vbitmap msg", K(ret)); + } else if (OB_ISNULL(bitmaps_->insert_bitmap_ = roaring::api::roaring64_bitmap_create())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create insert bitmap", K(ret)); + } else if (OB_ISNULL(bitmaps_->delete_bitmap_ = roaring::api::roaring64_bitmap_create())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create delete bitmap", K(ret)); + } + + return ret; +} + +int ObVectorQueryAdaptorResultContext::is_bitmaps_valid() +{ + INIT_SUCC(ret); + if (OB_ISNULL(bitmaps_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx bitmaps invalid.", K(ret)); + } else if (OB_ISNULL(bitmaps_->insert_bitmap_) || OB_ISNULL(bitmaps_->delete_bitmap_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx bitmap insert or delete invalid.", K(ret)); + } + + return ret; +} + +// int ObVectorQueryAdaptorResultContext::set_vector(int64_t index, ObString &str) +int ObVectorQueryAdaptorResultContext::set_vector(int64_t index, const char *ptr, common::ObString::obstr_size_t size) +{ + INIT_SUCC(ret); + char *copy_str = nullptr; + if (OB_ISNULL(tmp_allocator_)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("get invalid allocator.", K(ret)); + } else if (index >= get_count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid index.", K(ret), K(index), K(get_count())); + } else if (size / sizeof(float) != get_dim()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid vector str.", K(ret), K(ptr), K(get_dim())); + } else if (OB_ISNULL(copy_str = static_cast(tmp_allocator_->alloc(sizeof(char*) * size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocator.", K(ret)); + } else { + memcpy(copy_str, ptr, size); + vec_data_.vectors_[index].reset(); + vec_data_.vectors_[index].set_string(ObVarcharType, copy_str, size); + } + + return ret; +} + +static void free_memdata_resource(ObVectorIndexRecordType type, + ObVectorIndexMemData *&memdata, + ObIAllocator *allocator) +{ + LOG_INFO("free memdata", K(type), KP(memdata), K(allocator), K(lbt())); // remove later + if (OB_NOT_NULL(memdata->bitmap_)) { + if (OB_NOT_NULL(memdata->bitmap_->insert_bitmap_)) { + roaring::api::roaring64_bitmap_free(memdata->bitmap_->insert_bitmap_); + memdata->bitmap_->insert_bitmap_ = nullptr; + } + if (OB_NOT_NULL(memdata->bitmap_->delete_bitmap_)) { + roaring::api::roaring64_bitmap_free(memdata->bitmap_->delete_bitmap_); + memdata->bitmap_->delete_bitmap_ = nullptr; + } + if (OB_NOT_NULL(memdata->bitmap_)) { + allocator->free(memdata->bitmap_); + memdata->bitmap_ = nullptr; + } + } + if (OB_NOT_NULL(memdata->index_)) { + obvectorutil::delete_index(memdata->index_); + LOG_INFO("delete vector index", K(type), KP(memdata->index_), K(lbt())); // remove later + memdata->index_ = nullptr; + } +} + +static int try_free_memdata_resource(ObVectorIndexRecordType type, + ObVectorIndexMemData *&memdata, + ObIAllocator *allocator) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(memdata)) { + // do nothing + } else if (OB_ISNULL(allocator)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("allocator is null", K(ret), K(type), KPC(memdata), K(allocator)); + } else if (memdata->dec_ref_and_check_release()) { + free_memdata_resource(type, memdata, allocator); + if (OB_NOT_NULL(memdata->mem_ctx_)) { + memdata->mem_ctx_->~ObVsagMemContext(); + allocator->free(memdata->mem_ctx_); + memdata->mem_ctx_ = nullptr; + } + allocator->free(memdata); + memdata = nullptr; + } else { + // do nothing + } + return ret; +} + +ObPluginVectorIndexAdaptor::ObPluginVectorIndexAdaptor(common::ObIAllocator *allocator, lib::MemoryContext &entity) + : create_type_(CreateTypeMax), type_(VIAT_MAX), + algo_data_(nullptr), incr_data_(nullptr), snap_data_(nullptr), vbitmap_data_(nullptr), + snapshot_tablet_id_(ObTabletID(ObTabletID::INVALID_TABLET_ID)), + inc_tablet_id_(ObTabletID(ObTabletID::INVALID_TABLET_ID)), + vbitmap_tablet_id_(ObTabletID(ObTabletID::INVALID_TABLET_ID)), + data_tablet_id_(ObTabletID(ObTabletID::INVALID_TABLET_ID)), + rowkey_vid_tablet_id_(ObTabletID(ObTabletID::INVALID_TABLET_ID)), + vid_rowkey_tablet_id_(ObTabletID(ObTabletID::INVALID_TABLET_ID)), + inc_table_id_(OB_INVALID_ID), vbitmap_table_id_(OB_INVALID_ID), + snapshot_table_id_(OB_INVALID_ID), data_table_id_(OB_INVALID_ID), + rowkey_vid_table_id_(OB_INVALID_ID), vid_rowkey_table_id_(OB_INVALID_ID), + ref_cnt_(0), idle_cnt_(0), mem_check_cnt_(0), all_vsag_use_mem_(nullptr), allocator_(allocator), + parent_mem_ctx_(entity), index_identity_(), follower_sync_statistics_() +{ +} + +ObPluginVectorIndexAdaptor::~ObPluginVectorIndexAdaptor() +{ + int ret = OB_SUCCESS; + LOG_INFO("destruct adaptor and free resources", K(is_complete()), K(this), KPC(this), K(lbt())); // remove later + // inc + if (OB_NOT_NULL(incr_data_) + && (OB_FAIL(try_free_memdata_resource(VIRT_INC, incr_data_, allocator_)))) { + LOG_WARN("failed to free incr memdata", K(ret), KPC(this)); + } + + if (OB_SUCC(ret) + && OB_NOT_NULL(vbitmap_data_) + && OB_FAIL(try_free_memdata_resource(VIRT_BITMAP, vbitmap_data_, allocator_))) { + LOG_WARN("failed to free vbitmap memdata", K(ret), KPC(this)); + } + + if (OB_SUCC(ret) + && OB_NOT_NULL(snap_data_) + && OB_FAIL(try_free_memdata_resource(VIRT_SNAP, snap_data_, allocator_))) { + LOG_WARN("failed to free snap memdata", K(ret), KPC(this)); + } + + // use another memdata struct for the following? + if (OB_NOT_NULL(allocator_)) { + if(!index_identity_.empty()) { + allocator_->free(index_identity_.ptr()); + index_identity_.reset(); + } + if (OB_NOT_NULL(algo_data_)) { + allocator_->free(algo_data_); + algo_data_ = nullptr; + } + } +} + +int ObPluginVectorIndexAdaptor::init_mem(ObVectorIndexMemData *&table_info) +{ + INIT_SUCC(ret); + void *table_buff = nullptr; + if (OB_NOT_NULL(table_info)) { + // do nothing + } else if (OB_ISNULL(get_allocator())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("adaptor allocator invalid.", K(ret)); + } else if (OB_ISNULL(table_buff = static_cast( + get_allocator()->alloc(sizeof(ObVectorIndexMemData))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create vbitmap msg", K(ret)); + } else if (OB_FALSE_IT(table_info = new(table_buff) ObVectorIndexMemData())) { + } else if (OB_ISNULL(table_info->mem_ctx_ = OB_NEWx(ObVsagMemContext, get_allocator(), all_vsag_use_mem_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create mem_ctx msg", K(ret)); + } else { + table_info->scn_.set_min(); + table_info->inc_ref(); + } + + if (OB_FAIL(ret)) { + if (OB_NOT_NULL(table_buff)) { + get_allocator()->free(table_buff); + table_buff = nullptr; + } + } + return ret; +} + +bool ObPluginVectorIndexAdaptor::is_mem_data_init_atomic(ObVectorIndexRecordType type) +{ + bool bret = false; + if (type == VIRT_INC) { + bret = (OB_NOT_NULL(incr_data_) && incr_data_->is_inited()); + } else if (type == VIRT_BITMAP) { + bret = (OB_NOT_NULL(vbitmap_data_) && vbitmap_data_->is_inited()); + } else if (type == VIRT_SNAP) { + bret = (OB_NOT_NULL(snap_data_) && snap_data_->is_inited()); + } + return bret; +} + +int ObPluginVectorIndexAdaptor::init(lib::MemoryContext &parent_mem_ctx, uint64_t *all_vsag_use_mem) +{ + INIT_SUCC(ret); + + if (OB_ISNULL(get_allocator())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("adaptor allocator invalid.", K(ret)); + } else if (OB_FAIL(init_mem(incr_data_))) { + LOG_WARN("failed to init incr mem data.", K(ret)); + } else if (OB_FAIL(init_mem(vbitmap_data_))) { + LOG_WARN("failed to init vbitmap mem data.", K(ret)); + } else if (OB_FAIL(init_mem(snap_data_))) { + LOG_WARN("failed to init snap mem data.", K(ret)); + } else { + parent_mem_ctx_ = parent_mem_ctx; + all_vsag_use_mem_ = all_vsag_use_mem; + } + // fail in middle success inited mem resouce should be released by the caller + return ret; +} + +int ObPluginVectorIndexAdaptor::init(ObString init_str, int64_t dim, lib::MemoryContext &parent_mem_ctx, uint64_t *all_vsag_use_mem) +{ + INIT_SUCC(ret); + ObVectorIndexAlgorithmType type; + + if (OB_ISNULL(get_allocator())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("adaptor allocator invalid.", K(ret)); + } else if (OB_FAIL(init_mem(incr_data_))) { + LOG_WARN("failed to init incr mem data.", K(ret)); + } else if (OB_FAIL(init_mem(vbitmap_data_))) { + LOG_WARN("failed to init vbitmap mem data.", K(ret)); + } else if (OB_FAIL(init_mem(snap_data_))) { + LOG_WARN("failed to init snap mem data.", K(ret)); + } else if (OB_FAIL(set_param(init_str, dim))){ + LOG_WARN("failed to set param.", K(ret)); + } else { + parent_mem_ctx_ = parent_mem_ctx; + all_vsag_use_mem_ = all_vsag_use_mem; + } + // fail in middle success inited mem resouce should be released by the caller + return ret; +} + +int ObPluginVectorIndexAdaptor::set_param(ObString init_str, int64_t dim) +{ + INIT_SUCC(ret); + ObVectorIndexHNSWParam *hnsw_param = nullptr; + if (OB_NOT_NULL(algo_data_)) { + // do nothing + } else if (OB_ISNULL(get_allocator())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("adaptor allocator invalid.", K(ret)); + } else if (OB_ISNULL(hnsw_param = static_cast + (get_allocator()->alloc(sizeof(ObVectorIndexHNSWParam))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate mem.", K(ret)); + } else if (OB_FAIL(ObVectorIndexUtil::parser_params_from_string(init_str, *hnsw_param))) { + LOG_WARN("failed to parse params.", K(ret)); + } else { + type_ = hnsw_param->type_; + algo_data_ = hnsw_param; + hnsw_param->dim_ = dim; + LOG_INFO("init vector index adapter with param", KPC(hnsw_param)); // change log to debug level later + } + + if (OB_FAIL(ret)) { + if (OB_NOT_NULL(hnsw_param)) { + get_allocator()->free(hnsw_param); + hnsw_param = nullptr; + } + } + return ret; +} + +int ObPluginVectorIndexAdaptor::param_deserialize(char *ptr, int32_t length, + ObIAllocator *allocator, + ObVectorIndexAlgorithmType &type, + void *¶m) +{ + INIT_SUCC(ret); + int64_t pos = 0; + ObVectorIndexAlgorithmHeader header; + if (OB_FAIL(header.deserialize(ptr, length, pos))) { + LOG_WARN("failed to deserialize header.", K(ret), K(ptr), K(pos)); + } else { + type = header.type_; + switch(type) { + case VIAT_HNSW: { + int64_t param_pos = 0; + ObVectorIndexHNSWParam *hnsw_param = nullptr; + if (OB_ISNULL(hnsw_param = static_cast + (allocator->alloc(sizeof(ObVectorIndexHNSWParam))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate mem.", K(ret)); + } else if (OB_FAIL(hnsw_param->deserialize(ptr, length, param_pos))) { + LOG_WARN("failed to deserialize hnsw param.", K(ret), K(param_pos)); + } else { + param = hnsw_param; + } + + if (OB_FAIL(ret)) { + if (OB_NOT_NULL(hnsw_param)) { + allocator->free(hnsw_param); + hnsw_param = nullptr; + } + } + + break; + } + default: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("get index algorithm type not support.", K(ret), K(type)); + break; + } + } + } + + return ret; +} + +int ObPluginVectorIndexAdaptor::get_dim(int64_t &dim) +{ + INIT_SUCC(ret); + // TODO [WORKDOC] work document NO.1 + if (type_ == VIAT_HNSW) { + ObVectorIndexHNSWParam *param = nullptr; + if (OB_ISNULL(param = static_cast(algo_data_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get param.", K(ret)); + } else { + dim = param->dim_; + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("get index algorithm type not support.", K(ret), K(type_)); + } + return ret; +} + +int ObPluginVectorIndexAdaptor::get_hnsw_param(ObVectorIndexHNSWParam *¶m) +{ + INIT_SUCC(ret); + if (type_ == VIAT_HNSW) { + if (OB_ISNULL(param = static_cast(algo_data_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get param.", K(ret)); + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("get index algorithm type not support.", K(ret), K(type_)); + } + return ret; +} + +int ObPluginVectorIndexAdaptor::fill_vector_index_info(ObVectorIndexInfo &info) +{ + int ret = OB_SUCCESS; + // table_id + info.rowkey_vid_table_id_ = rowkey_vid_table_id_; + info.vid_rowkey_table_id_ = vid_rowkey_table_id_; + info.inc_index_table_id_ = inc_table_id_; + info.vbitmap_table_id_ = vbitmap_table_id_; + info.snapshot_index_table_id_ = snapshot_table_id_; + info.data_table_id_ = data_table_id_; + // tablet_id + info.rowkey_vid_tablet_id_ = rowkey_vid_tablet_id_.id(); + info.vid_rowkey_tablet_id_ = vid_rowkey_tablet_id_.id(); + info.inc_index_tablet_id_ = inc_tablet_id_.id(); + info.vbitmap_tablet_id_ = vbitmap_tablet_id_.id(); + info.snapshot_index_tablet_id_ = snapshot_tablet_id_.id(); + info.data_tablet_id_ = data_tablet_id_.id(); + ObVectorIndexHNSWParam *param; + int64_t pos = 0; + + if (OB_FAIL(databuff_printf(info.statistics_, + sizeof(info.statistics_), pos, + "is_complete=%d;", is_complete()))) { + LOG_WARN("failed to fill statistics", K(ret), K(this)); + } else if (type_ == VIAT_MAX) { + // partial adapter without index configuration + } else if (OB_FAIL(get_hnsw_param(param))) { + LOG_WARN("get hnsw param failed.", K(ret)); + } else if (OB_FAIL(databuff_printf(info.statistics_, + sizeof(info.statistics_), pos, + "param=%s;", to_cstring(*param)))) { + LOG_WARN("failed to fill statistics", K(ret), K(this)); + } + if (FAILEDx(databuff_printf(info.statistics_, + sizeof(info.statistics_), pos, + "ref_cnt=%ld;", ATOMIC_LOAD(&ref_cnt_) - 1))) { // delete the virtual table ref + LOG_WARN("failed to fill statistics", K(ret), K(this)); + } else if (OB_FAIL(databuff_printf(info.statistics_, + sizeof(info.statistics_), pos, "idle_cnt=%ld;", idle_cnt_))) { + LOG_WARN("failed to fill statistics", K(ret), K(this)); + } else if (!index_identity_.empty() && OB_FAIL(databuff_printf( + info.statistics_, sizeof(info.statistics_), pos, + "index=%s;", to_cstring(index_identity_)))) { + LOG_WARN("failed to fill statistic", K(ret), K(this)); + } else if (nullptr != incr_data_ && OB_FAIL(databuff_printf( + info.statistics_, sizeof(info.statistics_), pos, + "incr_data.scn=%lu;", incr_data_->scn_.get_val_for_inner_table_field()))) { + LOG_WARN("failed to fill statistic", K(ret), K(this)); + } else if (nullptr != vbitmap_data_ && OB_FAIL(databuff_printf( + info.statistics_, sizeof(info.statistics_), pos, + "vbitmap_data.scn=%lu;", vbitmap_data_->scn_.get_val_for_inner_table_field()))) { + LOG_WARN("failed to fill statistic", K(ret), K(this)); + } else if (nullptr != snap_data_ && OB_FAIL(databuff_printf( + info.statistics_, sizeof(info.statistics_), pos, + "snap_data.scn=%lu;", snap_data_->scn_.get_val_for_inner_table_field()))) { + LOG_WARN("failed to fill statistic", K(ret), K(this)); + } + pos = 0; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(databuff_printf(info.sync_info_, sizeof(info.sync_info_), pos, + "incr_cnt=%lu;", follower_sync_statistics_.incr_count_))) { + LOG_WARN("failed to fill sync_info", K(ret), K(this)); + } else if (OB_FAIL(databuff_printf(info.sync_info_, sizeof(info.sync_info_), pos, + "vbitmap_cnt=%lu;", follower_sync_statistics_.vbitmap_count_))) { + LOG_WARN("failed to fill sync_info", K(ret), K(this)); + } else if (OB_FAIL(databuff_printf(info.sync_info_, sizeof(info.sync_info_), pos, + "snap_cnt=%lu;", follower_sync_statistics_.snap_count_))) { + LOG_WARN("failed to fill sync_info", K(ret), K(this)); + } else if (OB_FAIL(databuff_printf(info.sync_info_, sizeof(info.sync_info_), pos, + "sync_total_cnt=%lu;", follower_sync_statistics_.sync_count_))) { + LOG_WARN("failed to fill sync_info", K(ret), K(this)); + } else if (OB_FAIL(databuff_printf(info.sync_info_, sizeof(info.sync_info_), pos, + "sync_fail_cnt=%lu;", follower_sync_statistics_.sync_fail_))) { + LOG_WARN("failed to fill sync_info", K(ret), K(this)); + } + return ret; +} + +int ObPluginVectorIndexAdaptor::init_mem_data(ObVectorIndexRecordType type) +{ + INIT_SUCC(ret); + ObVectorIndexHNSWParam *param = nullptr; + const char* const DATATYPE_FLOAT32 = "float32"; + if (OB_FAIL(get_hnsw_param(param))) { + LOG_WARN("get hnsw param failed.", K(ret)); + } else if (OB_FAIL(check_vsag_mem_used())) { + LOG_WARN("check vsag mem used failed.", K(ret)); + } else if (type == VIRT_INC) { + TCWLockGuard lock_guard(incr_data_->mem_data_rwlock_); + if (!incr_data_->is_inited()) { + if (OB_FAIL(incr_data_->mem_ctx_->init(parent_mem_ctx_, all_vsag_use_mem_))) { + LOG_WARN("failed to init incr data mem ctx.", K(ret)); + } else if (OB_FAIL(obvectorutil::create_index(incr_data_->index_, + obvectorlib::HNSW_TYPE, + DATATYPE_FLOAT32, + VEC_INDEX_ALGTH[param->dist_algorithm_], + param->dim_, + param->m_, + param->ef_construction_, + param->ef_search_, + incr_data_->mem_ctx_))) { + ret = OB_ERR_VSAG_RETURN_ERROR; + LOG_WARN("failed to create vsag index.", K(ret)); + } else if (OB_ISNULL(incr_data_->bitmap_ = static_cast + (get_allocator()->alloc(sizeof(ObVectorIndexRoaringBitMap))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create delta_bitmap", K(ret)); + } else if (OB_ISNULL(incr_data_->bitmap_->insert_bitmap_ = roaring::api::roaring64_bitmap_create())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create delta insert bitmap", K(ret)); + } else { + incr_data_->bitmap_->delete_bitmap_ = nullptr; + incr_data_->set_inited(); // should release memory if fail + LOG_INFO("create incr index success.", K(ret), KP(incr_data_->index_), K(lbt())); // remove later + } + if (OB_FAIL(ret)) { + free_memdata_resource(type, incr_data_, get_allocator()); + if (incr_data_->mem_ctx_->is_inited()) { + incr_data_->mem_ctx_->~ObVsagMemContext(); + } + } + } + } else if (type == VIRT_BITMAP) { + TCWLockGuard lock_guard(vbitmap_data_->mem_data_rwlock_); + if (!vbitmap_data_->is_inited()) { + if (OB_ISNULL(vbitmap_data_->bitmap_ = static_cast + (get_allocator()->alloc(sizeof(ObVectorIndexRoaringBitMap))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create snapshot_bitmap", K(ret)); + } else if (OB_ISNULL(vbitmap_data_->bitmap_->insert_bitmap_ = roaring::api::roaring64_bitmap_create())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create snapshot insert bitmap", K(ret)); + } else if (OB_ISNULL(vbitmap_data_->bitmap_->delete_bitmap_ = roaring::api::roaring64_bitmap_create())) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to create snapshot delete bitmap", K(ret)); + } else { + vbitmap_data_->set_inited(); + } + if (OB_FAIL(ret)) { + free_memdata_resource(type, vbitmap_data_, get_allocator()); + } + } + } else if (type == VIRT_SNAP) { + TCWLockGuard lock_guard(snap_data_->mem_data_rwlock_); + if (!snap_data_->is_inited()) { + if (OB_FAIL(snap_data_->mem_ctx_->init(parent_mem_ctx_, all_vsag_use_mem_))) { + LOG_WARN("failed to init incr data mem ctx.", K(ret)); + } else if (OB_FAIL(obvectorutil::create_index(snap_data_->index_, + obvectorlib::HNSW_TYPE, + DATATYPE_FLOAT32, + VEC_INDEX_ALGTH[param->dist_algorithm_], + param->dim_, + param->m_, + param->ef_construction_, + param->ef_search_, + snap_data_->mem_ctx_))) { + ret = OB_ERR_VSAG_RETURN_ERROR; + LOG_WARN("failed to create vsag index.", K(ret), K(snap_data_->index_), KPC(param)); + } else { + snap_data_->set_inited(); + LOG_INFO("create snap data success.", K(ret), KP(snap_data_->index_), K(lbt())); // remove later + } + if (OB_FAIL(ret)) { + free_memdata_resource(type, snap_data_, get_allocator()); + if (snap_data_->mem_ctx_->is_inited()) { + snap_data_->mem_ctx_->~ObVsagMemContext(); + } + } + } + } + return ret; +} + +void *ObPluginVectorIndexAdaptor::get_incr_index() +{ + void *res = nullptr; + if (OB_NOT_NULL(incr_data_)) { + res = incr_data_->index_; + } + return res; +} + +void *ObPluginVectorIndexAdaptor::get_snap_index() +{ + void *res = nullptr; + if (OB_NOT_NULL(snap_data_)) { + res = snap_data_->index_; + } + return res; +} + +const roaring::api::roaring64_bitmap_t *ObPluginVectorIndexAdaptor::get_incr_ibitmap() +{ + roaring::api::roaring64_bitmap_t *res = nullptr; + if (OB_NOT_NULL(incr_data_) && OB_NOT_NULL(incr_data_->bitmap_)) { + res = incr_data_->bitmap_->insert_bitmap_; + } + return res; +} + +const roaring::api::roaring64_bitmap_t *ObPluginVectorIndexAdaptor::get_vbitmap_ibitmap() +{ + roaring::api::roaring64_bitmap_t *res = nullptr; + if (OB_NOT_NULL(vbitmap_data_) && OB_NOT_NULL(vbitmap_data_->bitmap_)) { + res = vbitmap_data_->bitmap_->insert_bitmap_; + } + return res; +} + +const roaring::api::roaring64_bitmap_t *ObPluginVectorIndexAdaptor::get_vbitmap_dbitmap() +{ + roaring::api::roaring64_bitmap_t *res = nullptr; + if (OB_NOT_NULL(vbitmap_data_) && OB_NOT_NULL(vbitmap_data_->bitmap_)) { + res = vbitmap_data_->bitmap_->delete_bitmap_; + } + return res; +} + +int ObPluginVectorIndexAdaptor::check_tablet_valid(ObVectorIndexRecordType type) +{ + INIT_SUCC(ret); + if (type == VIRT_INC) { + if (!is_inc_tablet_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expect insert inc index but table id invalid.", K(ret)); + } + } else if (type == VIRT_SNAP) { + if (!is_snap_tablet_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expect insert snap index but table id invalid.", K(ret)); + } + } else if (type == VIRT_BITMAP) { + if (!is_vbitmap_tablet_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expect insert snap index but table id invalid.", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get vector index record type invalid.", K(ret)); + } + + return ret; +} + +int ObPluginVectorIndexAdaptor::insert_rows(blocksstable::ObDatumRow *rows, + const int64_t vid_idx, + const int64_t type_idx, + const int64_t vector_idx, + int64_t row_count) +{ + INIT_SUCC(ret); + int64_t dim = 0; + ObArenaAllocator tmp_allocator("VectorAdaptor", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + if (OB_ISNULL(rows)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get rows null.", K(ret)); + } else if (OB_FAIL(check_tablet_valid(VIRT_INC))) { + LOG_WARN("check tablet id invalid.", K(ret)); + } else if (OB_FAIL(try_init_mem_data(VIRT_INC))) { + LOG_WARN("failed to init incr index.", K(ret)); + } else if (row_count <= 0) { + // do nothing + } else if (OB_FAIL(get_dim(dim))) { + LOG_WARN("get dim failed.", K(ret)); + } else { + uint64_t incr_vid_count = 0; + uint64_t del_vid_count = 0; + uint64_t null_vid_count = 0; + int64_t *incr_vids = nullptr; + uint64_t *del_vids = nullptr; + uint64_t *null_vids = nullptr; + float *vectors = nullptr; + + if (OB_ISNULL(incr_vids = static_cast(tmp_allocator.alloc(sizeof(int64_t) * row_count)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc incr vids.", K(ret)); + } else if (OB_ISNULL(del_vids = static_cast(tmp_allocator.alloc(sizeof(uint64_t) * row_count)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc del vids.", K(ret)); + } else if (OB_ISNULL(null_vids = static_cast(tmp_allocator.alloc(sizeof(uint64_t) * row_count)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc del vids.", K(ret)); + } else if (OB_ISNULL(vectors = static_cast(tmp_allocator.alloc(sizeof(float) * row_count * dim)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc vectors.", K(ret)); + } + + for (int i = 0; OB_SUCC(ret) && i < row_count; i++) { + int64_t vid = 0; + ObString op_str; + ObString vector_str; + float *vector = nullptr; + ObDatum &vid_datum = rows[i].storage_datums_[vid_idx]; + ObDatum &op_datum = rows[i].storage_datums_[type_idx]; + ObDatum &vector_datum = rows[i].storage_datums_[vector_idx]; + + if (FALSE_IT(vid = vid_datum.get_int())) { + // LOG_WARN("failed to get uint64.", K(ret), K(vid_datum), K(i)); + } else if (FALSE_IT(op_str = op_datum.get_string())) { + // LOG_WARN("failed to get op str.", K(ret), K(op_datum), K(i)); + } else if (op_str.ptr()[0] == sql::ObVecIndexDMLIterator::VEC_DELTA_DELETE[0]) { + // D type, only record vid + del_vids[del_vid_count++] = vid; + } else if (vector_datum.len_ == 0) { + null_vids[null_vid_count++] = vid; + } else if (vector_datum.len_ / sizeof(float) != dim) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get vector objct unexpect.", K(ret), K(vector_datum)); + } else if (FALSE_IT(vector_str = vector_datum.get_string())) { + LOG_WARN("failed to get vector string.", K(ret)); + } else if (OB_ISNULL(vector = reinterpret_cast(vector_str.ptr()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to cast vectors.", K(ret)); + } else { + for (int j = 0; j < dim; j++) { + vectors[incr_vid_count * dim + j] = vector[j]; + } + incr_vids[incr_vid_count++] = vid; + } + } + + OX(check_vsag_mem_used()); + if (OB_SUCC(ret)) { + TCWLockGuard lock_guard(incr_data_->mem_data_rwlock_); + if (OB_FAIL(obvectorutil::add_index(incr_data_->index_, + vectors, + incr_vids, + dim, + incr_vid_count))) { + ret = OB_ERR_VSAG_RETURN_ERROR; + LOG_WARN("failed to add index.", K(ret), K(dim), K(row_count)); + } + } + if (OB_SUCC(ret)) { + TCWLockGuard lock_guard(incr_data_->bitmap_rwlock_); + roaring::api::roaring64_bitmap_add_many(incr_data_->bitmap_->insert_bitmap_, incr_vid_count, reinterpret_cast(incr_vids)); + roaring::api::roaring64_bitmap_remove_many(incr_data_->bitmap_->insert_bitmap_, del_vid_count, del_vids); + roaring::api::roaring64_bitmap_add_many(incr_data_->bitmap_->insert_bitmap_, null_vid_count, null_vids); + } + } + + return ret; +} + +/************************************************************************** +* Note: +* The number of vids must be equal to num; +* There cannot be null pointers in vectors; +* The number of floats in vectors must be equal to num * dim; + +* If you want to verify the above content in the add_snap_index interface, you need to traverse vectors and vids. + In the scenario where a large amount of data is written, there will be a lot of unnecessary performance consumption, + so the caller needs to ensure this. +**************************************************************************/ +int ObPluginVectorIndexAdaptor::add_snap_index(float *vectors, int64_t *vids, int num) +{ + INIT_SUCC(ret); + int64_t dim = 0; + if (OB_FAIL(check_tablet_valid(VIRT_SNAP))) { + LOG_WARN("check tablet id invalid.", K(ret)); + } else if (OB_FAIL(get_dim(dim))) { + LOG_WARN("get dim failed.", K(ret)); + } else if (OB_FAIL(try_init_mem_data(VIRT_SNAP))) { + LOG_WARN("init snap index failed.", K(ret)); + } else if (num == 0 || OB_ISNULL(vectors)) { + // do nothing + } else if (OB_ISNULL(vids)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid data.", K(ret)); + } else { + TCWLockGuard lock_guard(snap_data_->mem_data_rwlock_); + if (OB_FAIL(obvectorutil::add_index(snap_data_->index_, vectors, vids, dim, num))) { + ret = OB_ERR_VSAG_RETURN_ERROR; + LOG_WARN("failed to build index.", K(ret), K(dim), K(num)); + } + } + + return ret; +} + +// Query Processor first +int ObPluginVectorIndexAdaptor::check_delta_buffer_table_readnext_status(ObVectorQueryAdaptorResultContext *ctx, + common::ObNewRowIterator *row_iter, + SCN query_scn) +{ + INIT_SUCC(ret); + SCN min_delta_scn; + + // TODO 优先判断是否需要等待 PVQ_WAIT + if (OB_ISNULL(ctx) || OB_ISNULL(row_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get ctx or row_iter invalid.", K(ret), KP(row_iter)); + } else if (OB_FAIL(ctx->init_bitmaps())) { + LOG_WARN("failed to init ctx bitmaps.", K(ret)); + } else { + ObTableScanIterator *table_scan_iter = static_cast(row_iter); + while (OB_SUCC(ret)) { + blocksstable::ObDatumRow *datum_row = nullptr; + int64_t vid = 0; + ObString op; + if (OB_FAIL(table_scan_iter->get_next_row(datum_row))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next row failed.", K(ret)); + } + } else if (OB_ISNULL(datum_row) || !datum_row->is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get row invalid.", K(ret)); + } else if (datum_row->get_column_count() != 3) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get row column cnt invalid.", K(ret), K(datum_row->get_column_count())); + } else if (OB_FALSE_IT(vid = datum_row->storage_datums_[0].get_int())) { + LOG_WARN("failed to get vid.", K(ret)); + } else if (OB_FALSE_IT(op = datum_row->storage_datums_[1].get_string())) { + LOG_WARN("failed to get op.", K(ret)); + } else if (op.length() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid op length.", K(ret), K(op)); + } else { + if (op.ptr()[0] == sql::ObVecIndexDMLIterator::VEC_DELTA_INSERT[0]) { + roaring::api::roaring64_bitmap_add(ctx->bitmaps_->insert_bitmap_, vid); + + } else if (op.ptr()[0] == sql::ObVecIndexDMLIterator::VEC_DELTA_DELETE[0]) { + roaring::api::roaring64_bitmap_remove(ctx->bitmaps_->insert_bitmap_, vid); + roaring::api::roaring64_bitmap_add(ctx->bitmaps_->delete_bitmap_, vid); + + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid op.", K(ret), K(op)); + } + } + } + + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + +#ifdef ENABLE_DEBUG_LOG + output_bitmap(ctx->bitmaps_->insert_bitmap_); + output_bitmap(ctx->bitmaps_->delete_bitmap_); +#endif + + if (OB_SUCC(ret)) { + ctx->status_ = PVQ_LACK_SCN; + } + } + + return ret; +} + +int ObPluginVectorIndexAdaptor::write_into_delta_mem(ObVectorQueryAdaptorResultContext *ctx, int count, float *vectors, uint64_t *vids) +{ + INIT_SUCC(ret); + if (count == 0) { + // do nothing + } else if (!is_mem_data_init_atomic(VIRT_INC)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("write into delta mem but incr memdata uninit.", K(ret)); + } else if (OB_FAIL(check_vsag_mem_used())) { + LOG_WARN("check vsag mem used failed.", K(ret)); + } else { + TCWLockGuard lock_guard(incr_data_->mem_data_rwlock_); + if (check_if_complete_delta(ctx->bitmaps_->insert_bitmap_)) { + if (OB_SUCC(ret)) { + TCWLockGuard lock_guard(incr_data_->bitmap_rwlock_); + roaring64_bitmap_add_many(incr_data_->bitmap_->insert_bitmap_, count, vids); + } + + if (OB_SUCC(ret) && OB_FAIL(obvectorutil::add_index(incr_data_->index_, + vectors, + reinterpret_cast(vids), + ctx->get_dim(), + count))) { + ret = OB_ERR_VSAG_RETURN_ERROR; + LOG_WARN("failed to add index.", K(ret), K(ctx->get_dim()), K(count)); + } + } + } + + return ret; +} + +int ObPluginVectorIndexAdaptor::complete_delta_buffer_table_data(ObVectorQueryAdaptorResultContext *ctx) +{ + INIT_SUCC(ret); + float *vectors = nullptr; + uint64_t *vids = nullptr; + int count = 0; + ObArenaAllocator tmp_allocator("VectorAdaptor", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + if (OB_ISNULL(ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid ctx.", K(ret)); + } else if (ctx->get_count() == 0) { + // do nothing + } else if (OB_FAIL(try_init_mem_data(VIRT_INC))) { + LOG_WARN("failed to init incr mem data.", K(ret)); + } else if (OB_ISNULL(vectors = static_cast(tmp_allocator.alloc(sizeof(float) * ctx->get_dim() * ctx->get_count())))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc new mem.", K(ret)); + } else if (OB_ISNULL(vids = static_cast(tmp_allocator.alloc(sizeof(uint64_t) * ctx->get_count())))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc new mem.", K(ret)); + } else { + int64_t dim = ctx->get_dim(); + for (int i = 0; OB_SUCC(ret) && i < ctx->get_count(); i++) { + float *vector = nullptr; + if (ctx->vec_data_.vectors_[i].is_null() || ctx->vec_data_.vectors_[i].get_string().empty()) { + // do nothing + } else if (ctx->vec_data_.vectors_[i].get_string().length() != dim * sizeof(float)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid string.", K(ret), K(ctx->vec_data_.vectors_[i]), K(dim)); + } else if (OB_ISNULL(vector = reinterpret_cast(ctx->vec_data_.vectors_[i].get_string().ptr()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get float vector.", K(ret)); + } else { + vids[count] = ctx->get_vids()[i].get_int(); + for (int j = 0; OB_SUCC(ret) && j < dim; j++) { + vectors[count * dim + j] = vector[j]; + } + count++; + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(write_into_delta_mem(ctx, count, vectors, vids))) { + LOG_WARN("failed to write into delta mem.", K(ret), KP(ctx)); + } else { + ctx->status_ = PVQ_LACK_SCN; + } + + return ret; +} + +// Query Processor second +int ObPluginVectorIndexAdaptor::check_index_id_table_readnext_status(ObVectorQueryAdaptorResultContext *ctx, + common::ObNewRowIterator *row_iter, + SCN query_scn) +{ + INIT_SUCC(ret); + blocksstable::ObDatumRow *datum_row = nullptr; + int64_t read_num = 0; + SCN read_scn = SCN::min_scn(); + ObArray i_vids; + ObTableScanIterator *table_scan_iter = static_cast(row_iter); + + // TODO 优先判断是否需要等待 PVQ_WAIT + if (OB_ISNULL(ctx) || OB_ISNULL(table_scan_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get ctx or row_iter invalid.", K(ret), KP(row_iter)); + } else if (snap_data_->is_inited() && snap_data_->rb_flag_) { + ctx->status_ = PVQ_LACK_SCN; + ctx->flag_ = PVQP_SECOND; + } else { + ctx->status_ = PVQ_OK; + ctx->flag_ = PVQP_FIRST; + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(table_scan_iter->get_next_row(datum_row))) { + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get new row.", K(ret)); + } + } else { + if (!datum_row->is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid new row.", K(ret)); + } else if (OB_FALSE_IT(read_num = datum_row->storage_datums_[0].get_int())) { + LOG_WARN("failed to get read scn.", K(ret)); + } else if (OB_FAIL(read_scn.convert_for_gts(read_num))) { + LOG_WARN("failed to convert from ts.", K(ret), K(read_num)); + } + + if (OB_FAIL(ret)) { + } else if (check_if_complete_index(read_scn) && + OB_FAIL(complete_index_mem_data(read_scn, row_iter, datum_row, i_vids))) { + LOG_WARN("failed to check comple index mem data.", K(ret), K(read_scn), K(vbitmap_data_->scn_)); + } else if (OB_ISNULL(ctx->bitmaps_) || OB_ISNULL(ctx->bitmaps_->insert_bitmap_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get ctx bit map.", K(ret)); + } else if (check_if_complete_delta(ctx->bitmaps_->insert_bitmap_)) { + if (OB_FAIL(prepare_delta_mem_data(ctx->bitmaps_->insert_bitmap_, + i_vids, + ctx))) { + LOG_WARN("failed to complete.", K(ret)); + } else { + ctx->status_ = PVQ_COM_DATA; + } + } + } + + return ret; +} + +// Query Processor third +int ObPluginVectorIndexAdaptor::check_snapshot_table_wait_status(ObVectorQueryAdaptorResultContext *ctx) +{ + INIT_SUCC(ret); + // TODO 判断是否需要等待 PVQ_WAIT + ctx->status_ = PVQ_OK; + + return ret; +} + +int ObPluginVectorIndexAdaptor::write_into_index_mem(int64_t dim, SCN read_scn, + ObArray &i_vids, + ObArray &d_vids) +{ + INIT_SUCC(ret); + TCWLockGuard lock_guard(vbitmap_data_->mem_data_rwlock_); + if (read_scn > vbitmap_data_->scn_) { + TCWLockGuard wr_vbit_bitmap_lock_guard(vbitmap_data_->bitmap_rwlock_); + roaring::api::roaring64_bitmap_t *ibitmap = vbitmap_data_->bitmap_->insert_bitmap_; + roaring::api::roaring64_bitmap_t *dbitmap = vbitmap_data_->bitmap_->delete_bitmap_; + roaring::api::roaring64_bitmap_add_many(ibitmap, i_vids.count(), i_vids.get_data()); + roaring::api::roaring64_bitmap_add_many(dbitmap, d_vids.count(), d_vids.get_data()); + roaring::api::roaring64_bitmap_remove_many(ibitmap, d_vids.count(), d_vids.get_data()); + +#ifndef NDEBUG + output_bitmap(ibitmap); + output_bitmap(dbitmap); +#endif + + vbitmap_data_->scn_ = read_scn; + } + + return ret; +} + +bool ObPluginVectorIndexAdaptor::check_if_complete_index(SCN read_scn) +{ + bool res = false; + SCN bitmap_scn = vbitmap_data_->scn_; + if (read_scn > bitmap_scn) { + res = true; + LOG_DEBUG("need complete index mem data.", K(read_scn), K(bitmap_scn)); + } + + return res; +} + +int ObPluginVectorIndexAdaptor::add_datum_row_into_array(blocksstable::ObDatumRow *datum_row, + ObArray &i_vids, + ObArray &d_vids) +{ + INIT_SUCC(ret); + int64_t vid = 0; + ObString op; + if (OB_ISNULL(datum_row)|| !datum_row->is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get row invalid.", K(ret)); + } else if (OB_FALSE_IT(vid = datum_row->storage_datums_[1].get_int())) { + LOG_WARN("failed to get vid.", K(ret)); + } else if (OB_FALSE_IT(op = datum_row->storage_datums_[2].get_string())) { + LOG_WARN("failed to get op.", K(ret)); + } else if (op.length() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid op length.", K(ret), K(op)); + } else if (op.ptr()[0] == sql::ObVecIndexDMLIterator::VEC_DELTA_INSERT[0]) { + if (OB_FAIL(i_vids.push_back(vid))) { + LOG_WARN("failed to push back into vids.", K(ret), K(vid)); + } + } else if (op.ptr()[0] == sql::ObVecIndexDMLIterator::VEC_DELTA_DELETE[0]) { + if (OB_FAIL(d_vids.push_back(vid))) { + LOG_WARN("failed to push back into vids.", K(ret), K(vid)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid op.", K(ret), K(op)); + } + return ret; +} + +int ObPluginVectorIndexAdaptor::complete_index_mem_data(SCN read_scn, + common::ObNewRowIterator *row_iter, + blocksstable::ObDatumRow *last_row, + ObArray &i_vids) +{ + INIT_SUCC(ret); + int64_t dim = 0; + ObArray d_vids; + if (OB_ISNULL(row_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get ctx or row_iter null.", K(ret), KP(row_iter)); + } else if (OB_FAIL(try_init_mem_data(VIRT_BITMAP))) { + LOG_WARN("failed to init valid bitmap", K(ret), K(VIRT_BITMAP)); + } else if (OB_FAIL(add_datum_row_into_array(last_row, i_vids, d_vids))) { + LOG_WARN("failed to add vid into array.", K(ret), KP(last_row)); + } else { + ObTableScanIterator *table_scan_iter = static_cast(row_iter); + while (OB_SUCC(ret)) { + blocksstable::ObDatumRow *datum_row = nullptr; + int64_t vid = 0; + ObString op; + if (OB_FAIL(table_scan_iter->get_next_row(datum_row))) { + if (OB_ITER_END != ret) { + LOG_WARN("get next row failed.", K(ret)); + } + } else if (OB_FAIL(add_datum_row_into_array(datum_row, i_vids, d_vids))) { + LOG_WARN("failed to add vid into array.", K(ret), KP(datum_row)); + } + } + + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(get_dim(dim))) { + LOG_WARN("failed to get dim.", K(ret)); + } else if (OB_FAIL(write_into_index_mem(dim, read_scn, i_vids, d_vids))) { + LOG_WARN("failed to write into index mem.", K(ret), K(read_scn)); + } + } + + return ret; +} + +bool ObPluginVectorIndexAdaptor::check_if_complete_delta(roaring::api::roaring64_bitmap_t *gene_bitmap) +{ + bool res = false; + if (is_mem_data_init_atomic(VIRT_INC)) { + roaring::api::roaring64_bitmap_t *delta_bitmap = ATOMIC_LOAD(&(incr_data_->bitmap_->insert_bitmap_)); + if (!roaring64_bitmap_is_subset(gene_bitmap, delta_bitmap)) { + res = true; +#if 0 // ToDo: debug, remove later + if (gene_bitmap != nullptr) { + uint32_t gene_bitmap_count = roaring64_bitmap_get_cardinality(gene_bitmap); + uint64_t *gene_bitmap_out = (uint64_t *)malloc(gene_bitmap_count * sizeof(uint64_t)); + roaring64_bitmap_to_uint64_array(gene_bitmap, gene_bitmap_out); + for (int i = 0; i < gene_bitmap_count; i++) { + LOG_INFO("gene_bitmap_out", K(i), K(gene_bitmap_out[i])); + } + } else { + LOG_INFO("gene_bitmap is emptry"); + } + if (delta_bitmap != nullptr) { + uint32_t delta_bitmap_count = roaring64_bitmap_get_cardinality(delta_bitmap); + uint64_t *delta_bitmap_out = (uint64_t *)malloc(delta_bitmap_count * sizeof(uint64_t)); + roaring64_bitmap_to_uint64_array(delta_bitmap, delta_bitmap_out); + for (int i = 0; i < delta_bitmap_count; i++) { + LOG_INFO("delta_bitmap_out", K(i), K(delta_bitmap_out[i])); + } + } else { + LOG_INFO("delta_bitmap is emptry"); + } +#endif + } + } else if (roaring64_bitmap_get_cardinality(gene_bitmap) > 0) { + res = true; + } + return res; +} + +int ObPluginVectorIndexAdaptor::prepare_delta_mem_data(roaring::api::roaring64_bitmap_t *gene_bitmap, + ObArray &i_vids, + ObVectorQueryAdaptorResultContext *ctx) +{ + INIT_SUCC(ret); + roaring::api::roaring64_bitmap_t *delta_bitmap = nullptr; + if (OB_FAIL(try_init_mem_data(VIRT_INC))) { + LOG_WARN("failed to init mem data incr.", K(ret)); + } else if (OB_ISNULL(gene_bitmap) || OB_ISNULL(delta_bitmap = incr_data_->bitmap_->insert_bitmap_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid bitmap.", K(ret), KP(gene_bitmap), KP(delta_bitmap), KP(ctx->tmp_allocator_)); + } else { + roaring::api::roaring64_bitmap_t *andnot_bitmap = nullptr; + if (OB_SUCC(ret)) { + TCRLockGuard rd_bitmap_lock_guard(incr_data_->bitmap_rwlock_); + andnot_bitmap = roaring64_bitmap_andnot(gene_bitmap, delta_bitmap); + } + uint64_t bitmap_cnt = roaring64_bitmap_get_cardinality(andnot_bitmap) + i_vids.count(); + roaring::api::roaring64_iterator_t *bitmap_iter = roaring64_iterator_create(andnot_bitmap); + // uint64_t use roaring64_bitmap_to_uint64_array(andnot_bitmap, bitmap_out); + bool is_continue = true; + int index = 0; + int64_t dim = 0; + ObObj *vids = nullptr; + if (OB_FAIL(get_dim(dim))) { + LOG_WARN("failed to get dim.", K(ret)); + } else if (OB_ISNULL(vids = static_cast(ctx->tmp_allocator_->alloc(sizeof(ObObj) * bitmap_cnt)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocator.", K(ret), K(bitmap_cnt)); + } else if (OB_ISNULL(ctx->vec_data_.vectors_ = static_cast(ctx->tmp_allocator_->alloc(sizeof(ObObj) * bitmap_cnt)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocator.", K(ret), K(bitmap_cnt)); + } + + while (OB_SUCC(ret) && is_continue) { + vids[index].reset(); + vids[index++].set_int(roaring64_iterator_value(bitmap_iter)); + is_continue = roaring64_iterator_advance(bitmap_iter); + } + + if (OB_FAIL(ret)) { + } else if (index + i_vids.count() != bitmap_cnt) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid vid iter count.", K(ret), K(index), K(roaring64_bitmap_get_cardinality(andnot_bitmap))); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < i_vids.count() && i + index < bitmap_cnt; i++) { + vids[i + index].reset(); + vids[i + index].set_int(i_vids.at(i)); + } + + ctx->vec_data_.dim_ = dim; + ctx->vec_data_.count_ = bitmap_cnt; + ctx->vec_data_.vids_ = vids; + + for (int64_t i = 0; OB_SUCC(ret) && i < bitmap_cnt; i++) { + ctx->vec_data_.vectors_[i].reset(); + } + } + + if (OB_NOT_NULL(bitmap_iter)) { + roaring64_iterator_free(bitmap_iter); + bitmap_iter = nullptr; + } + + if (OB_NOT_NULL(andnot_bitmap)) { + roaring64_bitmap_free(andnot_bitmap); + andnot_bitmap = nullptr; + } + + } + + return ret; +} + +int ObPluginVectorIndexAdaptor::serialize(ObIAllocator *allocator, ObOStreamBuf::CbParam &cb_param, ObOStreamBuf::Callback &cb) +{ + int ret = OB_SUCCESS; + ObVectorIndexSerializer index_seri(*allocator); + int64_t snap_index_size = 0; + if (!snap_data_->is_inited()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("snap index is not init", K(ret)); + } else if (OB_FAIL(obvectorutil::get_index_number(snap_data_->index_, snap_index_size))) { + ret = OB_ERR_VSAG_RETURN_ERROR; + LOG_WARN("failed to get snap index number.", K(ret)); + } else if (snap_index_size == 0) { + // do nothing + LOG_INFO("[vec index] empty snap index, do not need to serialize"); + } else if (OB_FAIL(index_seri.serialize(snap_data_->index_, cb_param, cb))) { + LOG_WARN("serialize index failed.", K(ret)); + } else { + snap_data_->rb_flag_ = true; + } + return ret; +} + +int ObPluginVectorIndexAdaptor::generate_snapshot_valid_bitmap(ObVectorQueryAdaptorResultContext *ctx, + common::ObNewRowIterator *row_iter, + SCN query_scn) +{ + INIT_SUCC(ret); + + + return ret; +} + +int ObPluginVectorIndexAdaptor::merge_and_generate_bitmap(ObVectorQueryAdaptorResultContext *ctx, + roaring::api::roaring64_bitmap_t *&ibitmap, + roaring::api::roaring64_bitmap_t *&dbitmap) +{ + INIT_SUCC(ret); + if (OB_ISNULL(ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid argument.", K(ctx)); + } else if (!is_mem_data_init_atomic(VIRT_BITMAP)) { + roaring::api::roaring64_bitmap_t *insert_map = ctx->bitmaps_->insert_bitmap_; + uint64_t insert_min = roaring64_bitmap_minimum(insert_map); + uint64_t insert_max = roaring64_bitmap_maximum(insert_map); + ibitmap = roaring64_bitmap_flip_closed(insert_map, insert_min, insert_max); + dbitmap = ctx->bitmaps_->delete_bitmap_; + roaring64_bitmap_or_inplace(ibitmap, dbitmap); + } else { + roaring::api::roaring64_bitmap_t *insert_map = ctx->bitmaps_->insert_bitmap_; + dbitmap = ctx->bitmaps_->delete_bitmap_; +#ifndef NDEBUG + output_bitmap(insert_map); + output_bitmap(dbitmap); + output_bitmap(vbitmap_data_->bitmap_->insert_bitmap_); + output_bitmap(vbitmap_data_->bitmap_->delete_bitmap_); +#endif + if (OB_SUCC(ret)) { + TCRLockGuard rd_bitmap_lock_guard(vbitmap_data_->bitmap_rwlock_); + roaring64_bitmap_or_inplace(insert_map, vbitmap_data_->bitmap_->insert_bitmap_); + roaring64_bitmap_or_inplace(dbitmap, vbitmap_data_->bitmap_->delete_bitmap_); + } + + uint64_t insert_min = roaring64_bitmap_minimum(insert_map); + uint64_t insert_max = roaring64_bitmap_maximum(insert_map); + ibitmap = roaring64_bitmap_flip_closed(insert_map, insert_min, insert_max); + + roaring64_bitmap_or_inplace(ibitmap, dbitmap); + +#ifndef NDEBUG + output_bitmap(ibitmap); + output_bitmap(dbitmap); +#endif + } + + return ret; +} + +int ObPluginVectorIndexAdaptor::vsag_query_vids(ObVectorQueryAdaptorResultContext *ctx, + ObVectorQueryConditions *query_cond, + int64_t dim, float *query_vector, + ObVectorQueryVidIterator *&vids_iter) +{ + INIT_SUCC(ret); + roaring::api::roaring64_bitmap_t *ibitmap = nullptr; + roaring::api::roaring64_bitmap_t *dbitmap = nullptr; + + int64_t *merge_vids = nullptr; + const int64_t *delta_vids = nullptr; + const int64_t *snap_vids = nullptr; + const float *delta_distances = nullptr; + const float *snap_distances = nullptr; + int64_t delta_res_cnt = 0; + int64_t snap_res_cnt = 0; + + if (OB_FAIL(check_vsag_mem_used())) { + LOG_WARN("failed to check vsag mem used.", K(ret)); + } else if (OB_FAIL(merge_and_generate_bitmap(ctx, ibitmap, dbitmap))) { + LOG_WARN("failed to merge and generate bitmap.", K(ret)); + } + + if (OB_SUCC(ret)) { + TCRLockGuard lock_guard(incr_data_->mem_data_rwlock_); + if (OB_FAIL(is_mem_data_init_atomic(VIRT_INC) && + obvectorutil::knn_search(get_incr_index(), + query_vector, + dim, + query_cond->query_limit_, + delta_distances, + delta_vids, + delta_res_cnt, + query_cond->ef_search_, + ibitmap))) { + ret = OB_ERR_VSAG_RETURN_ERROR; + LOG_WARN("knn search delta failed.", K(ret), K(dim)); + } + } + if (OB_SUCC(ret)) { + TCRLockGuard lock_guard(snap_data_->mem_data_rwlock_); + if (OB_FAIL(is_mem_data_init_atomic(VIRT_SNAP) && + obvectorutil::knn_search(get_snap_index(), + query_vector, + dim, + query_cond->query_limit_, + snap_distances, + snap_vids, + snap_res_cnt, + query_cond->ef_search_, + dbitmap))) { + ret = OB_ERR_VSAG_RETURN_ERROR; + LOG_WARN("knn search snap failed.", K(ret), K(dim)); + } + } + if (OB_FAIL(ret)) { + } else { + int64_t actual_res_cnt; + const ObVsagQueryResult delta_data = {delta_res_cnt, delta_vids, delta_distances}; + const ObVsagQueryResult snap_data = {snap_res_cnt, snap_vids, snap_distances}; + uint64_t tmp_result_cnt = delta_res_cnt + snap_res_cnt; + uint64_t max_res_cnt = tmp_result_cnt < query_cond->query_limit_ ? tmp_result_cnt : query_cond->query_limit_; + + if (max_res_cnt == 0) { + // when max_res_cnt == 0, it means (snap_res_cnt == 0 && delta_res_cnt == 0), there is no data in table, do not need alloc memory for res_vid_array + actual_res_cnt = 0; + } else if (OB_ISNULL(merge_vids = static_cast(ctx->allocator_->alloc /*can't use tmp allocator here, its final result of query*/ + (sizeof(int64_t) * max_res_cnt)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocator merge vids.", K(ret)); + } else if (OB_FAIL(ObPluginVectorIndexHelper::merge_delta_and_snap_vids(delta_data, snap_data, + query_cond->query_limit_, + actual_res_cnt, merge_vids))) { + LOG_WARN("failed to merge delta and snap vids.", K(ret)); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(vids_iter->init(actual_res_cnt, merge_vids, ctx->allocator_))) { + LOG_WARN("iter init failed.", K(ret), K(actual_res_cnt), K(merge_vids), K(ctx->allocator_)); + } + } + // free in the end + if (OB_NOT_NULL(ibitmap)) { + roaring64_bitmap_free(ibitmap); + ibitmap = nullptr; + } + + if (delta_res_cnt != 0) { + if (delta_distances != nullptr) { + incr_data_->mem_ctx_->Deallocate((void *)delta_distances); + delta_distances = nullptr; + } + + if (delta_vids != nullptr) { + incr_data_->mem_ctx_->Deallocate((void *)delta_vids); + delta_vids = nullptr; + } + } + + if (snap_res_cnt != 0) { + if (snap_distances != nullptr) { + snap_data_->mem_ctx_->Deallocate((void *)snap_distances); + snap_distances = nullptr; + } + + if (snap_vids != nullptr) { + snap_data_->mem_ctx_->Deallocate((void *)snap_vids); + snap_distances = nullptr; + } + } + return ret; +} + +int ObPluginVectorIndexAdaptor::query_result(ObVectorQueryAdaptorResultContext *ctx, + ObVectorQueryConditions *query_cond, + ObVectorQueryVidIterator *&vids_iter) +{ + INIT_SUCC(ret); + vids_iter = nullptr; + int64_t dim = 0; + int64_t *merge_vids = nullptr; + void *iter_buff = nullptr; + float *query_vector; + + if (OB_ISNULL(ctx) || OB_ISNULL(query_cond)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get ctx invalid.", K(ret)); + } else if (query_cond->query_limit_ <= 0 || query_cond->query_vector_.empty()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid query limit.", K(ret), K(query_cond->query_limit_)); + } else if (OB_FAIL(get_dim(dim))) { + LOG_WARN("get dim failed.", K(ret)); + } else if (query_cond->query_vector_.length() / sizeof(float) != dim) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get vector objct unexpect.", K(ret), K(query_cond->query_vector_.length()), K(dim)); + } else if (OB_ISNULL(query_vector = reinterpret_cast(query_cond->query_vector_.ptr()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to cast vectors.", K(ret), K(query_cond->query_vector_)); + } else if (OB_FAIL(ctx->is_bitmaps_valid())) { + LOG_WARN("ctx bitmap invalid.", K(ret)); + } else if (OB_ISNULL(iter_buff = ctx->allocator_->alloc(sizeof(ObVectorQueryVidIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocator iter.", K(ret)); + } else if (OB_FALSE_IT(vids_iter = new(iter_buff) ObVectorQueryVidIterator())) { + } else if (ctx->flag_ == PVQP_FIRST) { + if (OB_FAIL(vsag_query_vids(ctx, query_cond, dim, query_vector, vids_iter))) { + LOG_WARN("failed to query vids.", K(ret), K(dim)); + } + + } else if (ctx->flag_ == PVQP_SECOND) { + ObArenaAllocator tmp_allocator("VectorAdaptor", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + if (OB_ISNULL(query_cond->row_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get snapshot table iter null.", K(ret), KP(query_cond)); + } else if (OB_FAIL(try_init_mem_data(VIRT_SNAP))) { + LOG_WARN("try init snap mem data failed.", K(ret)); + } else { + ObHNSWDeserializeCallback::CbParam param; + param.iter_ = query_cond->row_iter_; + param.allocator_ = &tmp_allocator; + + ObHNSWDeserializeCallback callback; + ObIStreamBuf::Callback cb = callback; + + ObVectorIndexSerializer index_seri(tmp_allocator); + if (OB_FAIL(index_seri.deserialize(snap_data_->index_, param, cb))) { + LOG_WARN("serialize index failed.", K(ret)); + } else { + close_snap_data_rb_flag(); + } + } + + if (OB_SUCC(ret) && OB_FAIL(vsag_query_vids(ctx, query_cond, dim, query_vector, vids_iter))) { + LOG_WARN("failed to query vids.", K(ret), K(dim)); + } + } + + return ret; +} + +int ObPluginVectorIndexAdaptor::cast_roaringbitmap_to_stdmap(const roaring::api::roaring64_bitmap_t *bitmap, + std::map &mymap) +{ + INIT_SUCC(ret); + uint64_t bitmap_cnt = roaring64_bitmap_get_cardinality(bitmap); + ObArenaAllocator tmp_allocator("VectorAdaptor", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + uint64_t *buf = nullptr; + + if (bitmap_cnt == 0) { + // do nothing + } else if (OB_ISNULL(bitmap)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get bitmap invalid.", K(ret)); + } else if (OB_ISNULL(buf = static_cast(tmp_allocator.alloc(sizeof(uint64_t) * bitmap_cnt)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc buf.", K(ret)); + } else { + roaring::api::roaring64_iterator_t *roaring_iter = roaring64_iterator_create(bitmap); + uint64_t ele_cnt = roaring64_iterator_read(roaring_iter, buf, bitmap_cnt); + for (int i = 0; i < ele_cnt; i++) { + mymap[buf[i]] = false; + } + } + return ret; +} + +int ObPluginVectorIndexAdaptor::set_tablet_id(ObVectorIndexRecordType type, ObTabletID tablet_id) +{ + int ret = OB_SUCCESS; + if (tablet_id.is_valid()) { + ObTabletID *tablet_to_modify = nullptr; + + if (type == VIRT_INC) { + tablet_to_modify = &inc_tablet_id_; + } else if (type == VIRT_BITMAP) { + tablet_to_modify = &vbitmap_tablet_id_; + } else if (type == VIRT_SNAP) { + tablet_to_modify = &snapshot_tablet_id_; + } else if (type == VIRT_DATA) { + tablet_to_modify = &data_tablet_id_; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN( "invalid type", KR(ret), K(type), K(tablet_id), K(*this)); + } + + if (OB_SUCC(ret)) { + if (tablet_to_modify->is_valid() && *tablet_to_modify != tablet_id) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet id already existed", KR(ret), K(type), K(tablet_id), K(*this)); + } else { + *tablet_to_modify = tablet_id; + } + } + } + return ret; +} + +int ObPluginVectorIndexAdaptor::set_table_id(ObVectorIndexRecordType type, uint64_t table_id) +{ + int ret = OB_SUCCESS; + if (table_id != OB_INVALID_ID) { + uint64_t *table_id_to_modify = nullptr; + + if (type == VIRT_INC) { + table_id_to_modify = &inc_table_id_; + } else if (type == VIRT_BITMAP) { + table_id_to_modify = &vbitmap_table_id_; + } else if ( type == VIRT_SNAP) { + table_id_to_modify = &snapshot_table_id_; + } else if (type == VIRT_DATA) { + table_id_to_modify = &data_table_id_; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid type", KR(ret), K(type), K(table_id), K(*this)); + } + + if (OB_SUCC(ret)) { + if (*table_id_to_modify != OB_INVALID_ID && *table_id_to_modify != table_id) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table id already existed", KR(ret), K(type), K(table_id), K(*this)); + } else { + *table_id_to_modify = table_id; + } + } + } + return ret; +} + +int ObPluginVectorIndexAdaptor::set_index_identity(ObString &index_identity) +{ + int ret = OB_SUCCESS; + if (!index_identity_.empty() && index_identity_ == index_identity) { + // do nothing + LOG_INFO("try to change same vector index identity", K(index_identity), K(*this)); + } else if (index_identity.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("vector index identity is empty", KR(ret), K(*this)); + } else if (OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null allocator to set vector index identity ", KR(ret), K(*this)); + } else { + if (!index_identity_.empty()) { + allocator_->free(index_identity_.ptr()); + index_identity_.reset(); + } + if (OB_FAIL(ob_write_string(*allocator_, index_identity, index_identity_))) { + LOG_WARN("fail set vector index identity ", KR(ret), K(*this)); + } else { + LOG_INFO("change vector index identity success", K(index_identity), K(*this)); + } + } + return ret; +} + +void ObPluginVectorIndexAdaptor::set_vid_rowkey_info(ObVectorIndexSharedTableInfo &info) +{ + rowkey_vid_tablet_id_ = info.rowkey_vid_tablet_id_; + vid_rowkey_tablet_id_ = info.vid_rowkey_tablet_id_; + rowkey_vid_table_id_ = info.rowkey_vid_table_id_; + vid_rowkey_table_id_ = info.vid_rowkey_table_id_; + data_table_id_ = info.data_table_id_; +} + +// use init flag instead? +bool ObPluginVectorIndexAdaptor::is_complete() +{ + return is_inc_tablet_valid() + && is_vbitmap_tablet_valid() + && is_snap_tablet_valid() + && is_data_tablet_valid() + && (vbitmap_table_id_ != OB_INVALID_ID) + && (inc_table_id_ != OB_INVALID_ID) + && (snapshot_table_id_ != OB_INVALID_ID); +} + +static int ref_memdata(ObVectorIndexMemData *&dst_mem_data, ObVectorIndexMemData *&src_mem_data) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(src_mem_data)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null input", KP(src_mem_data), KR(ret)); + } else { + dst_mem_data = src_mem_data; + dst_mem_data->inc_ref(); + } + return ret; +} + +int ObPluginVectorIndexAdaptor::merge_mem_data_(ObVectorIndexRecordType type, + ObPluginVectorIndexAdaptor *src_adapter, + ObVectorIndexMemData *&src_mem_data, + ObVectorIndexMemData *&dst_mem_data) +{ + // ToDo: may need lock or atomic access when replace dst mem data! + int ret = OB_SUCCESS; + bool is_same_mem_data = false; + if (OB_ISNULL(src_adapter) || OB_ISNULL(src_mem_data)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null input", KP(src_adapter), KP(src_mem_data), KR(ret)); + } else if ((this == src_adapter) || (src_mem_data == dst_mem_data)) { + is_same_mem_data = true; + } else if ((OB_NOT_NULL(dst_mem_data) && dst_mem_data->is_inited()) + && src_mem_data->is_inited()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("conflict use of src_mem_data", K(type), KPC(src_mem_data), KPC(dst_mem_data), K(lbt())); + } + + if (OB_FAIL(ret) || is_same_mem_data) { + // do nothing + } else if (src_mem_data->is_inited()) { + if (OB_NOT_NULL(dst_mem_data) && OB_FAIL(try_free_memdata_resource(type, dst_mem_data, allocator_))) { + LOG_WARN("failed to free mem data resource", KR(ret), K(type), KPC(dst_mem_data)); + } else { + dst_mem_data = nullptr; + } + (void)ref_memdata(dst_mem_data, src_mem_data); + } else if (OB_NOT_NULL(dst_mem_data) && dst_mem_data->is_inited()) { + // do nothing + } else { + // both mem data not used, decide by type + if (((type == VIRT_INC) && (src_adapter->get_create_type() == CreateTypeInc)) + || ((type == VIRT_BITMAP) && (src_adapter->get_create_type() == CreateTypeBitMap)) + || ((type == VIRT_SNAP) && (src_adapter->get_create_type() == CreateTypeSnap))) { + if (OB_NOT_NULL(dst_mem_data) && OB_FAIL(try_free_memdata_resource(type, dst_mem_data, allocator_))) { + LOG_WARN("failed to free mem data resource", KR(ret), K(type), KPC(dst_mem_data)); + } else { + (void)ref_memdata(dst_mem_data, src_mem_data); + } + } else if (OB_ISNULL(dst_mem_data)) { + // when full partial merge to complete + (void)ref_memdata(dst_mem_data, src_mem_data); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid type", K(type), KPC(src_adapter), KPC(dst_mem_data), KR(ret)); + } + } + return ret; +} + +// if merge failed, caller should release resources +int ObPluginVectorIndexAdaptor::merge_parital_index_adapter(ObPluginVectorIndexAdaptor *partial_idx_adpt) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(partial_idx_adpt)) { + // do nothing + } else if (partial_idx_adpt == this) { + // merge self, do nothing + } else { + if (partial_idx_adpt->is_inc_tablet_valid()) { + if (OB_FAIL(set_tablet_id(VIRT_INC, partial_idx_adpt->get_inc_tablet_id()))) { + LOG_WARN("partial vector index adapter not valid", K(partial_idx_adpt), K(*this), KR(ret)); + } else if (OB_FAIL(set_table_id(VIRT_INC, partial_idx_adpt->get_inc_table_id()))) { + LOG_WARN("partial vector index adapter not valid", K(partial_idx_adpt), K(*this), KR(ret)); + } else if (OB_FAIL(set_tablet_id(VIRT_DATA, partial_idx_adpt->get_data_tablet_id()))) { + LOG_WARN("partial vector index adapter not valid", K(partial_idx_adpt), K(*this), KR(ret)); + } else if (OB_FAIL(merge_mem_data_(VIRT_INC, partial_idx_adpt, partial_idx_adpt->incr_data_, incr_data_))){ + LOG_WARN("partial vector index adapter not valid", K(partial_idx_adpt), K(*this), KR(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (partial_idx_adpt->is_vbitmap_tablet_valid()) { + if (OB_FAIL(set_tablet_id(VIRT_BITMAP, partial_idx_adpt->get_vbitmap_tablet_id()))) { + LOG_WARN("partial vector index adapter not valid", K(partial_idx_adpt), K(*this), KR(ret)); + } else if (OB_FAIL(set_table_id(VIRT_BITMAP, partial_idx_adpt->get_vbitmap_table_id()))) { + LOG_WARN("partial vector index adapter not valid", K(partial_idx_adpt), K(*this), KR(ret)); + } else if (OB_FAIL(set_tablet_id(VIRT_DATA, partial_idx_adpt->get_data_tablet_id()))) { + LOG_WARN("partial vector index adapter not valid", K(partial_idx_adpt), K(*this), KR(ret)); + } else if (OB_FAIL(merge_mem_data_(VIRT_BITMAP, partial_idx_adpt, partial_idx_adpt->vbitmap_data_, vbitmap_data_))){ + LOG_WARN("partial vector index adapter not valid", K(partial_idx_adpt), K(*this), KR(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (partial_idx_adpt->is_snap_tablet_valid()) { + if (OB_FAIL(set_tablet_id(VIRT_SNAP, partial_idx_adpt->get_snap_tablet_id()))) { + LOG_WARN("partial vector index adapter not valid", K(partial_idx_adpt), K(*this), KR(ret)); + } else if (OB_FAIL(set_table_id(VIRT_SNAP, partial_idx_adpt->get_snapshot_table_id()))) { + LOG_WARN("partial vector index adapter not valid", K(partial_idx_adpt), K(*this), KR(ret)); + } else if (OB_FAIL(set_tablet_id(VIRT_DATA, partial_idx_adpt->get_data_tablet_id()))) { + LOG_WARN("partial vector index adapter not valid", K(partial_idx_adpt), K(*this), KR(ret)); + } else if (OB_FAIL(merge_mem_data_(VIRT_SNAP, partial_idx_adpt, partial_idx_adpt->snap_data_, snap_data_))){ + LOG_WARN("partial vector index adapter not valid", K(partial_idx_adpt), K(*this), KR(ret)); + } + } + + if (OB_SUCC(ret) && !partial_idx_adpt->get_index_identity().empty()) { + if (OB_FAIL(set_index_identity(partial_idx_adpt->get_index_identity()))) { + LOG_WARN("failed to set index identity", KR(ret), K(*this), KPC(partial_idx_adpt)); + } + } + + if (OB_SUCC(ret) && OB_NOT_NULL(partial_idx_adpt->all_vsag_use_mem_)) { + all_vsag_use_mem_ = partial_idx_adpt->all_vsag_use_mem_; + } + + if (OB_SUCC(ret) + && OB_ISNULL(algo_data_) + && OB_NOT_NULL(partial_idx_adpt->algo_data_)) { + // just replace for simple, fix memory later + ObVectorIndexHNSWParam *hnsw_param = nullptr; + if (OB_ISNULL(get_allocator())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("adaptor allocator invalid.", K(ret)); + } else if (OB_ISNULL(hnsw_param = static_cast + (get_allocator()->alloc(sizeof(ObVectorIndexHNSWParam))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate mem.", K(ret)); + } else { + *hnsw_param = *(ObVectorIndexHNSWParam *)partial_idx_adpt->algo_data_; + algo_data_ = hnsw_param; + type_ = partial_idx_adpt->type_; + } + } + } + return ret; +} + +void ObPluginVectorIndexAdaptor::inc_ref() +{ + int64_t ref_count = ATOMIC_AAF(&ref_cnt_, 1); + // LOG_INFO("inc ref count", K(ref_count), KP(this), KPC(this), K(lbt())); // remove later +} + +bool ObPluginVectorIndexAdaptor::dec_ref_and_check_release() +{ + int64_t ref_count = ATOMIC_SAF(&ref_cnt_, 1); + // LOG_INFO("dec ref count", K(ref_count), KP(this), KPC(this), K(lbt())); + return (ref_count == 0); +} + +int ObPluginVectorIndexAdaptor::check_need_sync_to_follower(bool &need_sync) +{ + int ret = OB_SUCCESS; + need_sync = false; + + if (!is_complete()) { + // do nothing + ret = OB_INVALID_ARGUMENT; + LOG_WARN("no complete adapter need not sync memdata", K(*this), KR(ret)); + } else { + // no get_index_number interface currently + int64_t current_incr_count = 0; + if (OB_NOT_NULL(get_incr_index())) { + if (OB_FAIL(obvectorutil::get_index_number(get_incr_index(), current_incr_count))) { + ret = OB_ERR_VSAG_RETURN_ERROR; + LOG_WARN("fail to get incr index number", K(ret)); + } + } + + int64_t current_bitmap_count = 0; + + if (OB_NOT_NULL(get_vbitmap_dbitmap())) { + TCRLockGuard rd_bitmap_lock_guard(vbitmap_data_->bitmap_rwlock_); + current_bitmap_count += roaring64_bitmap_get_cardinality(get_vbitmap_dbitmap()); + } + if (OB_NOT_NULL(get_vbitmap_ibitmap())) { + TCRLockGuard rd_bitmap_lock_guard(vbitmap_data_->bitmap_rwlock_); + current_bitmap_count += roaring64_bitmap_get_cardinality(get_vbitmap_ibitmap()); + } + + int64_t current_snapshot_count = 0; + if (OB_NOT_NULL(get_snap_index())) { + if (OB_FAIL(obvectorutil::get_index_number(get_snap_index(), current_snapshot_count))) { + ret = OB_ERR_VSAG_RETURN_ERROR; + LOG_WARN("fail to get snap index number", K(ret)); + } + } + + if (current_incr_count > follower_sync_statistics_.incr_count_ + VEC_INDEX_INCR_DATA_SYNC_THRESHOLD + || current_bitmap_count > follower_sync_statistics_.vbitmap_count_ + VEC_INDEX_INCR_DATA_SYNC_THRESHOLD + || current_snapshot_count != follower_sync_statistics_.snap_count_) { // use scn_ in memdata for compare + need_sync = true; + LOG_INFO("need sync to follower", + K(follower_sync_statistics_), K(current_incr_count), K(current_bitmap_count), + K(current_snapshot_count), KPC(this)); + } else { + LOG_DEBUG("not need sync to follower", + K(follower_sync_statistics_), K(current_incr_count), K(current_bitmap_count), + K(current_snapshot_count), KPC(this)); + } + + if (need_sync) { // if need sync, update statistics, otherwise use current statistics and check next loop + follower_sync_statistics_.incr_count_ = current_incr_count; + follower_sync_statistics_.vbitmap_count_ = current_bitmap_count; + follower_sync_statistics_.snap_count_ = current_snapshot_count; + } + } + return ret; +} + +// debug function +void ObPluginVectorIndexAdaptor::output_bitmap(roaring::api::roaring64_bitmap_t *bitmap) +{ + ObArenaAllocator tmp_allocator; + INIT_SUCC(ret); + roaring::api::roaring64_iterator_t *bitmap_iter = roaring64_iterator_create(bitmap); + uint64_t bitmap_cnt = roaring64_bitmap_get_cardinality(bitmap); + if (bitmap_cnt > 0) { + uint64_t *vids = static_cast(tmp_allocator.alloc(sizeof(uint64_t) * bitmap_cnt)); + int index = 0; + bool is_continue = bitmap_cnt != 0; + while (is_continue) { + vids[index++] = roaring64_iterator_value(bitmap_iter); + is_continue = roaring64_iterator_advance(bitmap_iter); + } + LOG_INFO("BITMAP_INFO:", K(ret), K(index), KP(vids), K(vids[0]), K(vids[index - 1])); + int a = 0; + } + + roaring64_iterator_free(bitmap_iter); + tmp_allocator.reset(); +} + +int ObPluginVectorIndexAdaptor::check_vsag_mem_used() +{ + INIT_SUCC(ret); + int64_t mem_size = 0; + // There is no need to worry about the thread safety of mem_check_cnt_ here, + // because mem_check_cnt_ is used to roughly determine + // whether to perform memory verification and does not require accurate counting. + mem_check_cnt_++; + if (mem_check_cnt_ % 10 == 0) { + mem_check_cnt_ %= 10; + if (OB_FAIL(ObPluginVectorIndexHelper::get_vector_memory_limit_size(MTL_ID(), mem_size))) { + LOG_WARN("failed to get vector mem limit size.", K(ret), K(MTL_ID())); + } else if (ATOMIC_LOAD(all_vsag_use_mem_) > mem_size) { + ret = OB_ERR_VSAG_MEM_LIMIT_EXCEEDED; + LOG_USER_ERROR(OB_ERR_VSAG_MEM_LIMIT_EXCEEDED, (int)mem_size >> 20); + LOG_WARN("Memory usage exceeds user limit.", K(ret)); + } + } + + return ret; +} + +int ObPluginVectorIndexAdaptor::get_incr_vsag_mem_used() +{ + int64_t size = 0; + if (incr_data_->is_inited()) { + size = incr_data_->mem_ctx_->used(); + } + return size; +} + +int ObPluginVectorIndexAdaptor::get_incr_vsag_mem_hold() +{ + int64_t size = 0; + if (incr_data_->is_inited()) { + size = incr_data_->mem_ctx_->hold(); + } + return size; +} + +int ObPluginVectorIndexAdaptor::get_snap_vsag_mem_used() +{ + int64_t size = 0; + if (snap_data_->is_inited()) { + size = snap_data_->mem_ctx_->used(); + } + return size; +} + +int ObPluginVectorIndexAdaptor::get_snap_vsag_mem_hold() +{ + int64_t size = 0; + if (snap_data_->is_inited()) { + size = snap_data_->mem_ctx_->hold(); + } + return size; +} + +void *ObVsagMemContext::Allocate(size_t size) +{ + void *ret_ptr = nullptr; + + if (size != 0) { + int64_t actual_size = MEM_PTR_HEAD_SIZE + size; + + void *ptr = mem_context_->get_malloc_allocator().alloc(actual_size); + if (OB_NOT_NULL(ptr)) { + ATOMIC_AAF(all_vsag_use_mem_, size); + + *(int64_t*)ptr = actual_size; + ret_ptr = (char*)ptr + MEM_PTR_HEAD_SIZE; + } + } + + return ret_ptr; +} + +void ObVsagMemContext::Deallocate(void* p) +{ + if (OB_NOT_NULL(p)) { + void *size_ptr = (char*)p - MEM_PTR_HEAD_SIZE; + int64_t size = *(int64_t *)size_ptr; + + ATOMIC_SAF(all_vsag_use_mem_, size); + mem_context_->get_malloc_allocator().free((char*)p - MEM_PTR_HEAD_SIZE); + p = nullptr; + } +} + +void *ObVsagMemContext::Reallocate(void* p, size_t size) +{ + void *new_ptr = nullptr; + if (size == 0) { + if (OB_NOT_NULL(p)) { + Deallocate(p); + p = nullptr; + } + } else if (OB_ISNULL(p)) { + new_ptr = Allocate(size); + } else { + void *size_ptr = (char*)p - MEM_PTR_HEAD_SIZE; + int64_t old_size = *(int64_t *)size_ptr - MEM_PTR_HEAD_SIZE; + if (old_size >= size) { + new_ptr = p; + } else { + new_ptr = Allocate(size); + if (OB_ISNULL(new_ptr) || OB_ISNULL(p)) { + } else { + MEMCPY(new_ptr, p, old_size); + Deallocate(p); + p = nullptr; + } + } + } + return new_ptr; +} + +int ObVsagMemContext::init(lib::MemoryContext &parent_mem_context, uint64_t *all_vsag_use_mem) +{ + INIT_SUCC(ret); + lib::ContextParam param; + ObMemAttr attr(MTL_ID(), "VIndexVsagADP"); + SET_IGNORE_MEM_VERSION(attr); + param.set_mem_attr(attr) + .set_page_size(OB_MALLOC_MIDDLE_BLOCK_SIZE) + .set_parallel(4) + .set_properties(lib::ALLOC_THREAD_SAFE | lib::RETURN_MALLOC_DEFAULT); + if (OB_FAIL(parent_mem_context->CREATE_CONTEXT(mem_context_, param))) { + LOG_WARN("create memory entity failed", K(ret)); + } else { + all_vsag_use_mem_ = all_vsag_use_mem; + } + + return ret; +} + +}; +}; diff --git a/src/share/vector_index/ob_plugin_vector_index_adaptor.h b/src/share/vector_index/ob_plugin_vector_index_adaptor.h new file mode 100644 index 0000000000..e59e9a5890 --- /dev/null +++ b/src/share/vector_index/ob_plugin_vector_index_adaptor.h @@ -0,0 +1,642 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + + +#ifndef OCEANBASE_SHARE_PLUGIN_VECTOR_INDEX_ADAPTOR_H_ +#define OCEANBASE_SHARE_PLUGIN_VECTOR_INDEX_ADAPTOR_H_ + +#include "share/scn.h" +#include "share/datum/ob_datum.h" +#include "roaring/roaring64.h" +#include "common/object/ob_obj_type.h" +#include "common/row/ob_row_iterator.h" +#include "share/vector_index/ob_plugin_vector_index_util.h" +#include "storage/ob_i_store.h" +#include "share/ob_ls_id.h" +#include "share/rc/ob_tenant_base.h" +#include "lib/oblog/ob_log_module.h" +#include "share/vector_index/ob_plugin_vector_index_serialize.h" + +namespace oceanbase +{ +namespace share +{ +struct ObPluginVectorIndexTaskCtx; +class ObVsagMemContext; + +struct ObVectorIndexInfo +{ +public: + ObVectorIndexInfo(); + ~ObVectorIndexInfo() { reset(); } + void reset(); + static const int64_t OB_VECTOR_INDEX_STATISTICS_SIZE = 2048; + static const int64_t OB_VECTOR_INDEX_SYNC_INFO_SIZE = 1024; + TO_STRING_KV(K_(ls_id), + K_(rowkey_vid_table_id), K_(vid_rowkey_table_id), K_(inc_index_table_id), + K_(vbitmap_table_id), K_(snapshot_index_table_id), K_(data_table_id), + K_(rowkey_vid_tablet_id), K_(vid_rowkey_tablet_id), K_(inc_index_tablet_id), + K_(vbitmap_tablet_id), K_(snapshot_index_tablet_id), K_(data_tablet_id), + K_(statistics), K_(sync_info)); +public: + int64_t ls_id_; + // table_id + int64_t rowkey_vid_table_id_; + int64_t vid_rowkey_table_id_; + int64_t inc_index_table_id_; + int64_t vbitmap_table_id_; + int64_t snapshot_index_table_id_; + int64_t data_table_id_; + // tablet_id + int64_t rowkey_vid_tablet_id_; + int64_t vid_rowkey_tablet_id_; + int64_t inc_index_tablet_id_; + int64_t vbitmap_tablet_id_; + int64_t snapshot_index_tablet_id_; + int64_t data_tablet_id_; + char statistics_[OB_VECTOR_INDEX_STATISTICS_SIZE]; + char sync_info_[OB_VECTOR_INDEX_SYNC_INFO_SIZE]; +}; + +enum ObVectorIndexDistAlgorithm +{ + VIDA_L2 = 0, + VIDA_IP = 1, + VIDA_COS = 2, + VIDA_MAX +}; + +enum ObVectorIndexAlgorithmLib +{ + VIAL_VSAG = 0, + VIAL_MAX +}; + +enum ObVectorIndexAlgorithmType +{ + VIAT_HNSW = 0, + VIAT_MAX +}; + +struct ObVectorIndexAlgorithmHeader +{ + ObVectorIndexAlgorithmType type_; + OB_UNIS_VERSION(1); +}; + +struct ObVectorIndexHNSWParam +{ + ObVectorIndexHNSWParam() : + type_(VIAT_MAX), lib_(VIAL_MAX), dim_(0), m_(0), ef_construction_(0), ef_search_(0) + {} + void reset() { + type_ = VIAT_MAX; + lib_ = VIAL_MAX; + dist_algorithm_ = VIDA_MAX; + dim_ = 0; + m_ = 0; + ef_construction_ = 0; + ef_search_ = 0; + }; + ObVectorIndexAlgorithmType type_; + ObVectorIndexAlgorithmLib lib_; + ObVectorIndexDistAlgorithm dist_algorithm_; + int64_t dim_; + int64_t m_; + int64_t ef_construction_; + int64_t ef_search_; + OB_UNIS_VERSION(1); +public: + TO_STRING_KV(K_(type), K_(lib), K_(dist_algorithm), K_(dim), K_(m), K_(ef_construction), K_(ef_search)); +}; + +enum ObVectorIndexRecordType +{ + VIRT_INC, // increment index + VIRT_BITMAP, + VIRT_SNAP, // snapshot index + VIRT_DATA, // data tablet/table + VIRT_MAX +}; + +enum ObAdapterCreateType +{ + CreateTypeInc = 0, + CreateTypeBitMap, + CreateTypeSnap, + CreateTypeFullPartial, + CreateTypeComplete, + CreateTypeMax +}; + +struct ObVectorIndexRoaringBitMap +{ + TO_STRING_KV(KP_(insert_bitmap), KP_(delete_bitmap)); + roaring::api::roaring64_bitmap_t *insert_bitmap_; + roaring::api::roaring64_bitmap_t *delete_bitmap_; +}; + +enum PluginVectorQueryResStatus +{ + PVQ_START, + PVQ_WAIT, + PVQ_LACK_SCN, + PVQ_OK, // ok + PVQ_COM_DATA, + PVQ_INVALID_SCN, + PVQ_MAX +}; + +enum ObVectorQueryProcessFlag +{ + PVQP_FIRST, + PVQP_SECOND, + PVQP_MAX, +}; + +struct ObVectorParamData +{ + int64_t dim_; + int64_t count_; + ObObj *vectors_; + ObObj *vids_; +}; + +class ObVectorQueryAdaptorResultContext { +public: + friend class ObPluginVectorIndexAdaptor; + ObVectorQueryAdaptorResultContext(ObIAllocator *allocator, ObIAllocator *tmp_allocator) + : status_(PVQ_START), + flag_(PVQP_MAX), + bitmaps_(nullptr), + vec_data_(), + allocator_(allocator), + tmp_allocator_(tmp_allocator) {}; + ~ObVectorQueryAdaptorResultContext(); + int init_bitmaps(); + int is_bitmaps_valid(); + ObObj *get_vids() { return vec_data_.vids_; } + ObObj *get_vectors() { return vec_data_.vectors_; } + int64_t get_dim() { return vec_data_.dim_; } + int64_t get_count() { return vec_data_.count_; } + PluginVectorQueryResStatus get_status() { return status_; } + ObVectorQueryProcessFlag get_flag() { return flag_; } + ObIAllocator *get_allocator() { return allocator_; } + ObIAllocator *get_tmp_allocator() { return tmp_allocator_; } + int set_vector(int64_t index, const char *ptr, common::ObString::obstr_size_t size); + void set_vectors(ObObj *vectors) { vec_data_.vectors_ = vectors; } + +private: + PluginVectorQueryResStatus status_; + ObVectorQueryProcessFlag flag_; + ObVectorIndexRoaringBitMap *bitmaps_; + ObVectorParamData vec_data_; + ObIAllocator *allocator_; + ObIAllocator *tmp_allocator_; +}; + +struct ObVectorQueryConditions { + uint32_t query_limit_; + bool query_order_; // true: asc, false: desc + int64_t ef_search_; + ObString query_vector_; + SCN query_scn_; + common::ObNewRowIterator *row_iter_; // index_snapshot_data_table iter +}; + +struct ObVectorIndexMemData +{ + ObVectorIndexMemData() + : is_init_(false), + rb_flag_(true), + mem_data_rwlock_(), + bitmap_rwlock_(), + scn_(), + ref_cnt_(0), + index_(nullptr), + bitmap_(nullptr), + mem_ctx_(nullptr) {} + +public: + TO_STRING_KV(K(rb_flag_), K_(is_init), K_(scn), K_(ref_cnt), KP_(index), KPC_(bitmap), KP_(mem_ctx)); + void free_resource(ObIAllocator *allocator_); + bool is_inited() const { return is_init_; } + void set_inited() { is_init_ = true; } + void inc_ref() + { + ATOMIC_INC(&ref_cnt_); + // OB_LOG(INFO, "inc ref count", K(ref_cnt_), KP(this), KPC(this), K(lbt())); // remove later + } + bool dec_ref_and_check_release() + { + int64_t ref_count = ATOMIC_SAF(&ref_cnt_, 1); + // OB_LOG(INFO,"dec ref count", K(ref_count), KP(this), KPC(this), K(lbt())); // remove later + return (ref_count == 0); + } + +public: + bool is_init_; + bool rb_flag_; + TCRWLock mem_data_rwlock_; + TCRWLock bitmap_rwlock_; + SCN scn_; + uint64_t ref_cnt_; + void *index_; + ObVectorIndexRoaringBitMap *bitmap_; + ObVsagMemContext *mem_ctx_; + // used for memdata exchange between adaptors +}; + +struct ObVectorIndexFollowerSyncStatic +{ +public: + ObVectorIndexFollowerSyncStatic() + : incr_count_(0), + vbitmap_count_(0), + snap_count_(0), + sync_count_(0), + sync_fail_(0), + idle_count_(0) + {} + void reset() { + incr_count_ = 0; + vbitmap_count_ = 0; + snap_count_ = 0; + sync_count_ = 0; + sync_fail_ = 0; + idle_count_ = 0; + } + TO_STRING_KV(K_(incr_count), K_(vbitmap_count), K_(snap_count), + K_(sync_count), K_(sync_fail), K_(idle_count)); + int64_t incr_count_; + int64_t vbitmap_count_; + int64_t snap_count_; + + int64_t sync_count_; + int64_t sync_fail_; + int64_t idle_count_; // loops not receive sync +}; + +struct ObVectorIndexSharedTableInfo +{ + ObVectorIndexSharedTableInfo() + : rowkey_vid_table_id_(OB_INVALID_ID), + vid_rowkey_table_id_(OB_INVALID_ID), + data_table_id_(OB_INVALID_ID), + rowkey_vid_tablet_id_(), + vid_rowkey_tablet_id_() + {} + bool is_valid() + { + return rowkey_vid_table_id_ != OB_INVALID_ID + && vid_rowkey_table_id_ != OB_INVALID_ID + && data_table_id_ != OB_INVALID_ID + && rowkey_vid_tablet_id_.is_valid() + && vid_rowkey_tablet_id_.is_valid(); + } + + TO_STRING_KV(K_(rowkey_vid_table_id), + K_(vid_rowkey_table_id), + K_(rowkey_vid_tablet_id), + K_(vid_rowkey_tablet_id), + K_(data_table_id)); + + uint64_t rowkey_vid_table_id_; + uint64_t vid_rowkey_table_id_; + uint64_t data_table_id_; + ObTabletID rowkey_vid_tablet_id_; + ObTabletID vid_rowkey_tablet_id_; +}; + +class ObPluginVectorIndexAdaptor +{ +public: + friend class ObVsagMemContext; + ObPluginVectorIndexAdaptor(common::ObIAllocator *allocator, lib::MemoryContext &entity); + ~ObPluginVectorIndexAdaptor(); + + int init(ObString init_str, int64_t dim, lib::MemoryContext &parent_mem_ctx, uint64_t *all_vsag_use_mem); + // only used for background maintance handle aux table no.4 / 5 before get index aux table no.3 + int init(lib::MemoryContext &parent_mem_ctx, uint64_t *all_vsag_use_mem); + int set_param(ObString init_str, int64_t dim); + int get_index_type() { return type_; }; + + // -- start 调试使用 + void init_incr_tablet() {inc_tablet_id_ = ObTabletID(common::ObTabletID::MIN_VALID_TABLET_ID); } + // -- end 调试使用 + + bool is_snap_tablet_valid() { return snapshot_tablet_id_.is_valid(); } + bool is_inc_tablet_valid() { return inc_tablet_id_.is_valid(); } + bool is_vbitmap_tablet_valid() { return vbitmap_tablet_id_.is_valid(); } + bool is_data_tablet_valid() { return data_tablet_id_.is_valid(); } + bool is_vid_rowkey_info_valid() { return rowkey_vid_table_id_ != OB_INVALID_ID && rowkey_vid_tablet_id_.is_valid(); } + + ObTabletID& get_inc_tablet_id() { return inc_tablet_id_; } + ObTabletID& get_vbitmap_tablet_id() { return vbitmap_tablet_id_; } + ObTabletID& get_snap_tablet_id() { return snapshot_tablet_id_; } + ObTabletID& get_data_tablet_id() { return data_tablet_id_; } + ObTabletID& get_rowkey_vid_tablet_id() { return rowkey_vid_tablet_id_; } + ObTabletID& get_vid_rowkey_tablet_id() { return vid_rowkey_tablet_id_; } + + ObVectorIndexMemData *get_incr_data() { return incr_data_; } + ObVectorIndexMemData *get_snap_data_() { return snap_data_; } + ObVectorIndexMemData *get_vbitmap_data() { return vbitmap_data_; } + + uint64_t get_inc_table_id() { return inc_table_id_; } + uint64_t get_vbitmap_table_id() { return vbitmap_table_id_; } + uint64_t get_snapshot_table_id() { return snapshot_table_id_; } + uint64_t get_data_table_id() { return data_table_id_; } + uint64_t get_rowkey_vid_table_id() { return rowkey_vid_table_id_; } + uint64_t get_vid_rowkey_table_id() { return vid_rowkey_table_id_; } + void close_snap_data_rb_flag() { + if (is_mem_data_init_atomic(VIRT_SNAP)) { + snap_data_->rb_flag_ = false; + } + } + + ObString &get_index_identity() { return index_identity_; }; + int set_index_identity(ObString &index_identity); + + bool is_valid() { return (is_inc_tablet_valid() || is_vbitmap_tablet_valid() || is_snap_tablet_valid()) && is_data_tablet_valid(); } + bool is_complete(); + + void inc_ref(); + bool dec_ref_and_check_release(); + void inc_idle() { idle_cnt_++; } + void reset_idle() { idle_cnt_ = 0; } + bool is_deprecated() { return idle_cnt_ > VEC_INDEX_ADAPTER_MAX_IDLE_COUNT; } + int set_tablet_id(ObVectorIndexRecordType type, ObTabletID tablet_id); + + int set_table_id(ObVectorIndexRecordType type, uint64_t table_id); + void set_vid_rowkey_info(ObVectorIndexSharedTableInfo &info); + + int merge_parital_index_adapter(ObPluginVectorIndexAdaptor *partial_index); + + int check_tablet_valid(ObVectorIndexRecordType type); + + int get_dim(int64_t &dim); + int get_hnsw_param(ObVectorIndexHNSWParam *¶m); + + // for virtual table + int fill_vector_index_info(ObVectorIndexInfo &info); + + const roaring::api::roaring64_bitmap_t *get_incr_ibitmap(); + const roaring::api::roaring64_bitmap_t *get_vbitmap_ibitmap(); + const roaring::api::roaring64_bitmap_t *get_vbitmap_dbitmap(); + + // VSAG ADD + int insert_rows(blocksstable::ObDatumRow *rows, + const int64_t vid_idx, + const int64_t type_idx, + const int64_t vector_idx, + const int64_t row_count); + + int add_snap_index(float *vectors, int64_t *vids, int num); + + // Query Processor first + int check_delta_buffer_table_readnext_status(ObVectorQueryAdaptorResultContext *ctx, + common::ObNewRowIterator *row_iter, + SCN query_scn); + int complete_delta_buffer_table_data(ObVectorQueryAdaptorResultContext *ctx); + // Query Processor second + int check_index_id_table_readnext_status(ObVectorQueryAdaptorResultContext *ctx, + common::ObNewRowIterator *row_iter, + SCN query_scn); + // Query Processor third + int check_snapshot_table_wait_status(ObVectorQueryAdaptorResultContext *ctx); + + int query_result(ObVectorQueryAdaptorResultContext *ctx, + ObVectorQueryConditions *query_cond, + ObVectorQueryVidIterator *&vids_iter); + static int param_deserialize(char *ptr, int32_t length, + ObIAllocator *allocator, + ObVectorIndexAlgorithmType &type, + void *¶m); + static int cast_roaringbitmap_to_stdmap(const roaring::api::roaring64_bitmap_t *bitmap, + std::map &mymap); + int check_vsag_mem_used(); + uint64_t get_all_vsag_mem_used() { + return ATOMIC_LOAD(all_vsag_use_mem_); + } + int get_incr_vsag_mem_used(); + int get_incr_vsag_mem_hold(); + int get_snap_vsag_mem_used(); + int get_snap_vsag_mem_hold(); + ObIAllocator *get_allocator() { return allocator_; } + + void *get_algo_data() { return algo_data_; } + + + int complete_index_mem_data(SCN read_scn, + common::ObNewRowIterator *row_iter, + blocksstable::ObDatumRow *last_row, + ObArray &i_vids); + int prepare_delta_mem_data(roaring::api::roaring64_bitmap_t *gene_bitmap, + ObArray &i_vids, + ObVectorQueryAdaptorResultContext *ctx); + int serialize(ObIAllocator *allocator, ObOStreamBuf::CbParam &cb_param, ObOStreamBuf::Callback &cb); + int complete_delta_mem_data(roaring::api::roaring64_bitmap_t *gene_bitmap, + roaring::api::roaring64_bitmap_t *delta_bitmap, + ObIAllocator *allocator); + + int check_need_sync_to_follower(bool &need_sync); + + void sync_finish() { follower_sync_statistics_.sync_count_++; } + void sync_fail() { follower_sync_statistics_.sync_fail_++; } + + void inc_sync_idle_count() { follower_sync_statistics_.idle_count_++; } + void reset_sync_idle_count() { follower_sync_statistics_.idle_count_ = 0;} + int64_t get_sync_idle_count() { return follower_sync_statistics_.idle_count_; } + + int init_mem(ObVectorIndexMemData *&table_info); + int init_mem_data(ObVectorIndexRecordType type); + bool is_mem_data_init_atomic(ObVectorIndexRecordType type); + int try_init_mem_data(ObVectorIndexRecordType type) { + int ret = OB_SUCCESS; + if (!is_mem_data_init_atomic(type)) { + ret = init_mem_data(type); + } + return ret; + } + + ObAdapterCreateType &get_create_type() { return create_type_; }; + void set_create_type(ObAdapterCreateType type) { create_type_ = type; }; + + TO_STRING_KV(K_(create_type), K_(type), KP_(algo_data), KP_(incr_data), KP_(snap_data), KP_(vbitmap_data), + K_(data_tablet_id),K_(rowkey_vid_tablet_id), K_(vid_rowkey_tablet_id), + K_(inc_tablet_id), K_(vbitmap_tablet_id), K_(snapshot_tablet_id), + K_(data_table_id), K_(rowkey_vid_table_id), K_(vid_rowkey_table_id), + K_(inc_table_id), K_(vbitmap_table_id), K_(snapshot_table_id), + K_(ref_cnt), K_(idle_cnt), KP_(allocator), + K_(index_identity), K_(follower_sync_statistics)); + +private: + void *get_incr_index(); + void *get_snap_index(); + int add_datum_row_into_array(blocksstable::ObDatumRow *datum_row, + ObArray &i_vids, + ObArray &d_vids); + bool check_if_complete_index(SCN read_scn); + bool check_if_complete_delta(roaring::api::roaring64_bitmap_t *gene_bitmap); + int write_into_delta_mem(ObVectorQueryAdaptorResultContext *ctx, int count, float *vectors, uint64_t *vids); + int write_into_index_mem(int64_t dim, SCN read_scn, + ObArray &i_vids, + ObArray &d_vids); + int generate_snapshot_valid_bitmap(ObVectorQueryAdaptorResultContext *ctx, + common::ObNewRowIterator *row_iter, + SCN query_scn); + + void output_bitmap(roaring::api::roaring64_bitmap_t *bitmap); + + int merge_mem_data_(ObVectorIndexRecordType type, + ObPluginVectorIndexAdaptor *partial_idx_adpt, + ObVectorIndexMemData *&src_mem_data, + ObVectorIndexMemData *&dst_mem_data); + int merge_and_generate_bitmap(ObVectorQueryAdaptorResultContext *ctx, + roaring::api::roaring64_bitmap_t *&ibitmap, + roaring::api::roaring64_bitmap_t *&dbitmap); + + int vsag_query_vids(ObVectorQueryAdaptorResultContext *ctx, + ObVectorQueryConditions *query_cond, + int64_t dim, float *query_vector, + ObVectorQueryVidIterator *&vids_iter); + +private: + ObAdapterCreateType create_type_; + ObVectorIndexAlgorithmType type_; + void *algo_data_; + ObVectorIndexMemData *incr_data_; + ObVectorIndexMemData *snap_data_; + ObVectorIndexMemData *vbitmap_data_; + + ObTabletID snapshot_tablet_id_; + ObTabletID inc_tablet_id_; + ObTabletID vbitmap_tablet_id_; + ObTabletID data_tablet_id_; + ObTabletID rowkey_vid_tablet_id_; + ObTabletID vid_rowkey_tablet_id_; + + uint64_t inc_table_id_; + uint64_t vbitmap_table_id_; + uint64_t snapshot_table_id_; + uint64_t data_table_id_; + uint64_t rowkey_vid_table_id_; + uint64_t vid_rowkey_table_id_; + + int64_t ref_cnt_; + int64_t idle_cnt_; // not merged cnt + int64_t mem_check_cnt_; + uint64_t *all_vsag_use_mem_; + ObIAllocator *allocator_; // allocator for alloc adapter self + lib::MemoryContext &parent_mem_ctx_; + ObString index_identity_; // identify multi indexes on one table & column, generate unique uint64 to save memory? + + // statistics for judging whether need sync follower + ObVectorIndexFollowerSyncStatic follower_sync_statistics_; + + constexpr static uint32_t VEC_INDEX_INCR_DATA_SYNC_THRESHOLD = 100; + constexpr static uint32_t VEC_INDEX_VBITMAP_SYNC_THRESHOLD = 100; + constexpr static uint32_t VEC_INDEX_SNAP_DATA_SYNC_THRESHOLD = 1; + constexpr static uint32_t VEC_INDEX_ADAPTER_MAX_IDLE_COUNT = 3; + constexpr const static char* const VEC_INDEX_ALGTH[ObVectorIndexDistAlgorithm::VIDA_MAX] = { + "l2", + "ip", + "cos", + }; +}; + +class ObPluginVectorIndexAdapterGuard +{ +public: + ObPluginVectorIndexAdapterGuard(ObPluginVectorIndexAdaptor *adapter = nullptr) + : adapter_(adapter) + {} + ~ObPluginVectorIndexAdapterGuard() + { + if (is_valid()) { + if (adapter_->dec_ref_and_check_release()) { + ObIAllocator *allocator = adapter_->get_allocator(); + if (OB_ISNULL(allocator)) { + const int ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "null allocator", KPC(adapter_)); + } else { + OB_LOG(INFO, "adatper released", KPC(adapter_), K(lbt())); + adapter_->~ObPluginVectorIndexAdaptor(); + allocator->free(adapter_); + } + } + adapter_ = nullptr; + } + } + + bool is_valid() { return adapter_ != nullptr; } + ObPluginVectorIndexAdaptor* get_adatper() { return adapter_; } + int set_adapter(ObPluginVectorIndexAdaptor *adapter) + { + int ret = OB_SUCCESS; + if (is_valid()) { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "vector index adapter guard can only set once", KPC(adapter_), KPC(adapter)); + } else { + adapter_ = adapter; + (void)adapter_->inc_ref(); + } + return ret; + } + TO_STRING_KV(KPC_(adapter)); + +private: + ObPluginVectorIndexAdaptor *adapter_; +}; + +class ObVsagMemContext : public vsag::Allocator +{ +public: + ObVsagMemContext(uint64_t *all_vsag_use_mem) + : all_vsag_use_mem_(all_vsag_use_mem), + mem_context_(nullptr) {}; + ~ObVsagMemContext() { + if (mem_context_ != nullptr) { + DESTROY_CONTEXT(mem_context_); + mem_context_ = nullptr; + all_vsag_use_mem_ = nullptr; + } + } + int init(lib::MemoryContext &parent_mem_context, uint64_t *all_vsag_use_mem); + bool is_inited() { return OB_NOT_NULL(mem_context_); } + + std::string Name() override { + return "ObVsagAlloc"; + } + void* Allocate(size_t size) override; + + void Deallocate(void* p) override; + + void* Reallocate(void* p, size_t size) override; + + int64_t hold() { + return mem_context_->hold(); + } + + int64_t used() { + return mem_context_->used(); + } + +private: + uint64_t *all_vsag_use_mem_; + lib::MemoryContext mem_context_; + constexpr static int64_t MEM_PTR_HEAD_SIZE = sizeof(int64_t); +}; + +}; +}; +#endif // OCEANBASE_SHARE_PLUGIN_VECTOR_INDEX_ADAPTOR_H_ \ No newline at end of file diff --git a/src/share/vector_index/ob_plugin_vector_index_scheduler.cpp b/src/share/vector_index/ob_plugin_vector_index_scheduler.cpp new file mode 100644 index 0000000000..57811f3b2f --- /dev/null +++ b/src/share/vector_index/ob_plugin_vector_index_scheduler.cpp @@ -0,0 +1,1471 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#define USING_LOG_PREFIX SERVER +#include "share/vector_index/ob_plugin_vector_index_scheduler.h" +#include "share/vector_index/ob_plugin_vector_index_service.h" +#include "share/vector_index/ob_plugin_vector_index_utils.h" +#include "share/vector_index/ob_vector_index_util.h" +#include "share/table/ob_ttl_util.h" +#include "share/scheduler/ob_dag_warning_history_mgr.h" +#include "storage/ls/ob_ls.h" +#include "storage/tx_storage/ob_ls_service.h" + +namespace oceanbase +{ +namespace share +{ + +int ObPluginVectorIndexLoadScheduler::init(uint64_t tenant_id, ObLS *ls, int ttl_timer_tg_id) +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexService *vector_index_service = MTL(ObPluginVectorIndexService *); + if (OB_ISNULL(vector_index_service) || OB_ISNULL(ls) || ttl_timer_tg_id == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tenant vector index load task fail", + KP(vector_index_service), KP(ls), K(ttl_timer_tg_id), KR(ret)); + } else { + vector_index_service_ = vector_index_service; + ls_ = ls; + tenant_id_ = tenant_id; + interval_factor_ = 1; + is_inited_ = true; + ttl_tablet_timer_tg_id_ = ttl_timer_tg_id; + basic_period_ = VEC_INDEX_SCHEDULAR_BASIC_PERIOD; + if (OB_FAIL(TG_SCHEDULE(ttl_timer_tg_id, *this, basic_period_, true))) { + LOG_WARN("fail to schedule periodic task", KR(ret), K(ttl_timer_tg_id)); + } + } + return ret; +} + +void ObPluginVectorIndexLoadScheduler::runTimerTask() +{ + ObCurTraceId::init(GCONF.self_addr_); + ObTimeGuard guard("ObPluginVectorIndexLoadScheduler::runTimerTask", + VEC_INDEX_LOAD_TIME_NORMAL_THRESHOLD); + run_task(); +} + +void ObPluginVectorIndexLoadScheduler::clean_deprecated_adapters() +{ + int ret = OB_SUCCESS; + ObSEArray delete_tablet_id_array; + delete_tablet_id_array.reset(); + + ObPluginVectorIndexMgr *index_ls_mgr = nullptr; + if (OB_FAIL(vector_index_service_->get_ls_index_mgr_map().get_refactored(ls_->get_ls_id(), index_ls_mgr))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to get vector index ls mgr", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } + } else if (OB_ISNULL(index_ls_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid vector index ls mgr", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } + + if (OB_SUCC(ret) && OB_NOT_NULL(index_ls_mgr)) { + FOREACH_X(iter, index_ls_mgr->get_complete_adapter_map(), OB_SUCC(ret)) { + ObPluginVectorIndexAdaptor *adapter = iter->second; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *table_schema; + if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("fail to get schema guard", KR(ret), K(tenant_id_)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, adapter->get_vbitmap_table_id(), table_schema))) { + LOG_WARN("failed to get simple schema", KR(ret), K(tenant_id_), K(adapter->get_vbitmap_table_id())); + } else if (OB_ISNULL(table_schema) || table_schema->is_in_recyclebin()) { + // remove adapter if tablet not exist or is in recyclebin + if (OB_FAIL(delete_tablet_id_array.push_back(adapter->get_inc_tablet_id()))) { + LOG_WARN("push back table id failed", + K(delete_tablet_id_array.count()), K(adapter->get_inc_tablet_id()), KR(ret)); + } else if (OB_FAIL(delete_tablet_id_array.push_back(adapter->get_vbitmap_tablet_id()))) { + LOG_WARN("push back table id failed", + K(delete_tablet_id_array.count()), K(adapter->get_vbitmap_tablet_id()), KR(ret)); + } else if (OB_FAIL(delete_tablet_id_array.push_back(adapter->get_snap_tablet_id()))) { + LOG_WARN("push back table id failed", + K(delete_tablet_id_array.count()), K(adapter->get_snap_tablet_id()), KR(ret)); + } + } + } + + LOG_INFO("try erase complete vector index adapter", + K(index_ls_mgr->get_ls_id()), K(delete_tablet_id_array.count())); // debug, remove later + + for (int64_t i = 0; OB_SUCC(ret) && i < delete_tablet_id_array.count(); i++) { + if (OB_FAIL(index_ls_mgr->erase_complete_adapter(delete_tablet_id_array.at(i)))) { + if (ret != OB_HASH_NOT_EXIST) { + LOG_WARN("failed to erase full vector index adapter", + K(index_ls_mgr->get_ls_id()), K(delete_tablet_id_array.at(i)), KR(ret)); + } else { // already removed + ret = OB_SUCCESS; + } + } + } + + delete_tablet_id_array.reset(); + + FOREACH_X(iter, index_ls_mgr->get_partial_adapter_map(), OB_SUCC(ret)) { + ObPluginVectorIndexAdaptor *adapter = iter->second; + ObTabletID tablet_id = iter->first; + ObTabletHandle tablet_handle; + if (OB_FAIL(ls_->get_tablet_svr()->get_tablet(tablet_id, tablet_handle))) { + if (OB_TABLET_NOT_EXIST != ret) { + LOG_WARN("fail to get tablet", K(ret), K(tablet_id)); + } else { + ret = OB_SUCCESS; // not found, moved from this ls + if (OB_FAIL(delete_tablet_id_array.push_back(tablet_id))) { + LOG_WARN("push back table id failed", + K(delete_tablet_id_array.count()), K(adapter->get_inc_tablet_id()), KR(ret)); + } + } + } else { + // tablet exist, but it may in recyclebin, cannot check schema if it is partial adapter from dml + // add count here if more then 3 loops not merged, remove them. + adapter->inc_idle(); + if (adapter->is_deprecated()) { + if (OB_FAIL(delete_tablet_id_array.push_back(tablet_id))) { + LOG_WARN("push back table id failed", + K(delete_tablet_id_array.count()), K(adapter->get_inc_tablet_id()), KR(ret)); + } + } + } + } + + LOG_INFO("try erase partial vector index adapter", + K(index_ls_mgr->get_ls_id()), K(delete_tablet_id_array.count())); // debug, remove later + + for (int64_t i = 0; OB_SUCC(ret) && i < delete_tablet_id_array.count(); i++) { + if (OB_FAIL(index_ls_mgr->erase_partial_adapter(delete_tablet_id_array.at(i)))) { + if (ret != OB_HASH_NOT_EXIST) { + LOG_WARN("failed to erase full vector index adapter", + K(index_ls_mgr->get_ls_id()), K(delete_tablet_id_array.at(i)), KR(ret)); + } else { // already removed + ret = OB_SUCCESS; + } + } + } + + delete_tablet_id_array.reset(); + } +} + +bool ObPluginVectorIndexLoadScheduler::check_can_do_work() +{ + bool bret = true; + int ret = OB_SUCCESS; + int64_t tenant_id = MTL_ID(); + uint64_t tenant_data_version = 0; + bool is_oracle_mode = false; + + if (OB_FAIL(ObCompatModeGetter::check_is_oracle_mode_with_tenant_id(tenant_id_, is_oracle_mode))) { + LOG_WARN("fail to check oracle mode", KR(ret), K_(tenant_id)); + } else if (is_oracle_mode) { + bret = false; + LOG_DEBUG("vector index not support oracle mode", K(tenant_id)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + bret = false; + LOG_WARN("get tenant data version failed", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_3_0) { + bret = false; + LOG_DEBUG("vector index can not work with data version less than 4_3_3", K(tenant_data_version)); + } else if (is_user_tenant(tenant_id)) { + if (OB_FAIL(GET_MIN_DATA_VERSION(gen_meta_tenant_id(tenant_id), tenant_data_version))) { + bret = false; + LOG_WARN("get tenant data version failed", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_3_0) { + bret = false; + LOG_DEBUG("vector index can not work with data version less than 4_3_3", K(tenant_data_version)); + } + } + return bret; +} + +int ObPluginVectorIndexLoadScheduler::check_schema_version() +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + int64_t schema_version = 0; + if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("fail to get schema guard", K(ret), K_(tenant_id)); + } else if (OB_FAIL(schema_guard.get_schema_version(tenant_id_, schema_version))) { + LOG_WARN("fail to get tenant schema version", K(ret), K_(tenant_id)); + } else if (!ObSchemaService::is_formal_version(schema_version)) { + ret = OB_EAGAIN; + LOG_INFO("is not a formal_schema_version", KR(ret), K(schema_version)); + } else if (local_schema_version_ == OB_INVALID_VERSION || local_schema_version_ < schema_version) { + FLOG_INFO("schema changed", KR(ret), K_(local_schema_version), K(schema_version)); + local_schema_version_ = schema_version; + mark_tenant_need_check(); + } + return ret; +} + +int ObPluginVectorIndexLoadScheduler::check_parital_index_adpter_exist(ObPluginVectorIndexMgr *mgr) +{ + int ret = OB_SUCCESS; + if (!mgr->get_partial_adapter_map().empty()) { + mark_tenant_need_check(); + } + return ret; +} + +void ObPluginVectorIndexLoadScheduler::mark_tenant_need_check() +{ + int ret = OB_SUCCESS; + if (common::ObTTLUtil::check_can_process_tenant_tasks(tenant_id_)) { + local_tenant_task_.need_check_ = true; + FLOG_INFO("finish mark tenant need check", K(local_tenant_task_)); + } + LOG_DEBUG("finsh mark tenant need check", KR(ret), K(local_tenant_task_.need_check_)); +} + +int ObPluginVectorIndexLoadScheduler::check_is_vector_index_table(const ObTableSchema &table_schema, + bool &is_vector_index_table, + bool &is_shared_index_table) +{ + int ret = OB_SUCCESS; + is_vector_index_table = false; + is_shared_index_table = false; + if (table_schema.is_index_table() && !table_schema.is_in_recyclebin()) { + if (table_schema.is_vec_delta_buffer_type() + || table_schema.is_vec_index_id_type() + || table_schema.is_vec_index_snapshot_data_type()) { + is_vector_index_table = true; + } else if (table_schema.is_vec_rowkey_vid_type() + || table_schema.is_vec_vid_rowkey_type()) { + is_shared_index_table = true; + } + } + return ret; +} + +void ObPluginVectorIndexLoadScheduler::mark_tenant_checked() +{ + local_tenant_task_.need_check_ = false; +} + +int ObPluginVectorIndexLoadScheduler::acquire_adapter_in_maintenance(const int64_t table_id, + const ObTableSchema *table_schema) +{ + int ret = OB_SUCCESS; + ObIndexType index_type = table_schema->get_index_type(); + ObLSID ls_id = ls_->get_ls_id(); + ObArray tablet_ids; + + if (OB_FAIL(table_schema->get_tablet_ids(tablet_ids))) { + LOG_WARN("fail to get tablet ids", KR(ret), K(table_id)); + } else if (OB_ISNULL(ls_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is null", KR(ret)); + } else { + ObTabletHandle tablet_handle; + for (int64_t i = 0; OB_SUCC(ret) && i < tablet_ids.count(); i++) { + if (OB_FAIL(ls_->get_tablet_svr()->get_tablet(tablet_ids.at(i), tablet_handle))) { + if (OB_TABLET_NOT_EXIST != ret) { + LOG_WARN("fail to get tablet", K(ret), K(tablet_ids.at(i))); + } else { + ret = OB_SUCCESS; // not found, continue loop + } + } else { + ObPluginVectorIndexAdapterGuard adapter_guard; + ObString index_identity; + // Notice:only no.3 aux table has vec_idx_params + ObString vec_idx_params = table_schema->get_index_params(); + int64_t dim = 0; + if (OB_FAIL(ObVectorIndexUtil::get_vector_index_column_dim(*table_schema, dim))) { + LOG_WARN("fail to get vec_index_col_param", K(ret)); + } else if (OB_FAIL(vector_index_service_->acquire_adapter_guard(ls_id, + tablet_ids.at(i), + index_type, + adapter_guard, + &vec_idx_params, + dim))) { + LOG_WARN("fail to acquire adapter gurad", K(ret), K(ls_id)); + } else if (adapter_guard.get_adatper()->is_complete()) { // get create type ? + // already exist full adapter, bypass + } else if (OB_FAIL(adapter_guard.get_adatper()-> + set_table_id(ObPluginVectorIndexUtils::index_type_to_record_type(index_type), table_id))) { + LOG_WARN("fail to set table id", K(ret), K(ls_id), K(tablet_ids.at(i))); + } else if (OB_FAIL(adapter_guard.get_adatper()-> + set_tablet_id(VIRT_DATA, tablet_handle.get_obj()->get_data_tablet_id()))) { + LOG_WARN("fail to fill partial index adapter info", + K(ret), K(ls_id), K(tablet_ids.at(i)), K(tablet_handle.get_obj()->get_data_tablet_id())); + } else if (OB_FAIL(ObPluginVectorIndexUtils::get_vector_index_prefix(*table_schema, + index_identity))) { + LOG_WARN("fail to get index identity", KR(ret)); + } else if (OB_FAIL(adapter_guard.get_adatper()->set_index_identity(index_identity))) { + LOG_WARN("fail to set index identity", KR(ret), KPC(adapter_guard.get_adatper())); + } else { + adapter_guard.get_adatper()->reset_idle(); + } + } + } + } + return ret; +} + +int ObPluginVectorIndexLoadScheduler::set_shared_table_info_in_maintenance( + const int64_t table_id, + const ObTableSchema *table_schema, + ObVecIdxSharedTableInfoMap &shared_table_info_map) +{ + int ret = OB_SUCCESS; + ObIndexType index_type = table_schema->get_index_type(); + ObArray tablet_ids; + + if (OB_FAIL(table_schema->get_tablet_ids(tablet_ids))) { + LOG_WARN("fail to get tablet ids", KR(ret), K(table_id)); + } else { + ObTabletHandle tablet_handle; + ObVectorIndexSharedTableInfo info; + ObTabletID data_tablet_id; + for (int64_t i = 0; OB_SUCC(ret) && i < tablet_ids.count(); i++) { + if (OB_FAIL(ls_->get_tablet_svr()->get_tablet(tablet_ids.at(i), tablet_handle))) { + if (OB_TABLET_NOT_EXIST != ret) { + LOG_WARN("fail to get tablet", K(ret), K(tablet_ids.at(i))); + } else { + ret = OB_SUCCESS; // not found, continue loop + } + } else if (FALSE_IT(data_tablet_id = tablet_handle.get_obj()->get_data_tablet_id())) { + } else if (OB_FAIL(shared_table_info_map.get_refactored(data_tablet_id, info))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("fail to get shared table info", K(ret), K(tablet_ids.at(i))); + } else { // OB_HASH_NOT_EXIST + if (index_type == INDEX_TYPE_VEC_ROWKEY_VID_LOCAL) { + info.rowkey_vid_table_id_ = table_id; + info.rowkey_vid_tablet_id_ = tablet_ids.at(i); + } else { + info.vid_rowkey_table_id_ = table_id; + info.vid_rowkey_tablet_id_ = tablet_ids.at(i); + } + info.data_table_id_ = table_schema->get_data_table_id(); + if (OB_FAIL(shared_table_info_map.set_refactored(data_tablet_id, info))) { + LOG_WARN("fail to set shared table info", K(ret), K(data_tablet_id)); + } + } + } else { + if (index_type == INDEX_TYPE_VEC_ROWKEY_VID_LOCAL) { + info.rowkey_vid_table_id_ = table_id; + info.rowkey_vid_tablet_id_ = tablet_ids.at(i); + } else { + info.vid_rowkey_table_id_ = table_id; + info.vid_rowkey_tablet_id_ = tablet_ids.at(i); + } + info.data_table_id_ = table_schema->get_data_table_id(); + const int overwrite = 1; + if (!info.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid shared table info", K(ret), K(info)); + } else if (OB_FAIL(shared_table_info_map.set_refactored(data_tablet_id, info, overwrite))) { + LOG_WARN("fail to set shared table info", K(ret), K(data_tablet_id)); + } + } + } + } + + return ret; +} + + +// scan all vector tablet in current tenant/LS +int ObPluginVectorIndexLoadScheduler::execute_adapter_maintenance() +{ + int ret = OB_SUCCESS; + ObTimeGuard guard("ObPluginVectorIndexLoadScheduler::check_and_generate_tablet_tasks", + VEC_INDEX_LOAD_TIME_NORMAL_THRESHOLD); + const schema::ObTableSchema *table_schema = nullptr; + ObSEArray table_id_array; + + ObVecIdxSharedTableInfoMap shared_table_info_map; + ObMemAttr memattr(MTL_ID(), "VecIdxInfo"); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("tablet vector index scheduler not init", KR(ret)); + } else { + clean_deprecated_adapters(); + } + + if (current_memory_config_ != 0) { // has memory for new adapter + + if (OB_FAIL(ObTTLUtil::get_tenant_table_ids(tenant_id_, table_id_array))) { + LOG_WARN("fail to get tenant table ids", KR(ret), K_(tenant_id)); + } else if (!table_id_array.empty() + && OB_FAIL(shared_table_info_map.create(DEFAULT_TABLE_ARRAY_SIZE, memattr, memattr))) { + LOG_WARN("fail to create param map", KR(ret)); + } + + int64_t start_idx = 0; + int64_t end_idx = 0; + + while (OB_SUCC(ret) && start_idx < table_id_array.count()) { + ObSchemaGetterGuard schema_guard; + start_idx = end_idx; + end_idx = MIN(table_id_array.count(), start_idx + TBALE_GENERATE_BATCH_SIZE); + + bool is_vector_index = false; + bool is_shared_index = false; + if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("fail to get schema guard", KR(ret), K_(tenant_id)); + } + + for (int64_t idx = start_idx; OB_SUCC(ret) && idx < end_idx; ++idx) { + const int64_t table_id = table_id_array.at(idx); + const ObTableSchema *table_schema = nullptr; + if (is_sys_table(table_id)) { + // do nothing + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, table_id, table_schema))) { + LOG_WARN("failed to get simple schema", KR(ret), K(table_id)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("table schema is null", KR(ret), K(table_id), K_(tenant_id)); + } else if (table_schema->is_in_recyclebin()) { + // do nothing + } else if (OB_FAIL(check_is_vector_index_table(*table_schema, is_vector_index, is_shared_index))) { + LOG_WARN("fail to check is vector index", KR(ret)); + } else if (is_vector_index + && OB_FAIL(acquire_adapter_in_maintenance(table_id, table_schema))) { + // for one vector_index table + LOG_WARN("fail to create adapter in maintenance", KR(ret), K(table_id)); + } else if (is_shared_index + && OB_FAIL(set_shared_table_info_in_maintenance(table_id, + table_schema, + shared_table_info_map))) { + LOG_WARN("fail to set shared table info", KR(ret), K(table_id)); + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(vector_index_service_->check_and_merge_adapter(ls_->get_ls_id(), shared_table_info_map))) { + LOG_WARN("fail to merge parital adapter task", KR(ret)); + } else { + mark_tenant_checked(); + } + } + + LOG_INFO("finish generate tenant tablet tasks", KR(ret), K_(tenant_id)); + return ret; +} + +int ObPluginVectorIndexLoadScheduler::check_tenant_memory() +{ + // ToDo: + // 1. check vector index memory usage + // 2. check adaptor number limit if needed + // 3. set condition: if out of use, only do clean task + int ret = OB_SUCCESS; + if (OB_FAIL(ObPluginVectorIndexHelper::get_vector_memory_limit_size(tenant_id_, current_memory_config_))) { + LOG_WARN("failed to get vector mem limit size.", K(ret), K_(tenant_id)); + ret = OB_SUCCESS; + current_memory_config_ = 0; + } else { + LOG_INFO("get vector mem limit size", KR(ret), K_(tenant_id), K_(current_memory_config)); + } + return ret; +} + +int read_tenant_task_status(uint64_t tenant_id, + common::ObISQLClient *sql_client, + ObVectorIndexTenantStatus& tenant_task) +{ + int ret = OB_SUCCESS; + if (!is_valid_tenant_id(tenant_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id)); + } else { + tenant_task.tenant_id_ = tenant_id; + tenant_task.status_ = OB_RS_TTL_TASK_CREATE; + } + return ret; +} + +// 1. check if loading feature is allowed: +// read from sys table with tenant id, special table id & special tablet id, not implemented +// 2. check if need mem load task +// from log replay, or long time not processed +int ObPluginVectorIndexLoadScheduler::reload_tenant_task() +{ + int ret = OB_SUCCESS; + ObVectorIndexTenantStatus tenant_task; + ObVectorIndexTaskStatus expected_state; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObPluginVectorIndexLoadScheduler not init", KR(ret)); + } else if (OB_FAIL(read_tenant_task_status(tenant_id_, NULL, tenant_task))) { + LOG_WARN("fail to read vector index tenant task", KR(ret), K_(tenant_id)); + } else if (OB_RS_TTL_TASK_MOVE == static_cast(tenant_task.status_) || + OB_RS_TTL_TASK_CANCEL == static_cast(tenant_task.status_)) { + FLOG_INFO("tenant task is finish now, reuse local tenant task", + KR(ret), K_(local_tenant_task), K(tenant_task.task_id_)); + } else if (OB_FAIL(ObTTLUtil::transform_tenant_state(static_cast(tenant_task.status_), expected_state))) { + LOG_WARN("fail to transform vector index tenant task status", KR(ret), K(tenant_task.status_)); + } else if (expected_state != OB_TTL_TASK_RUNNING) { // currently, only running state expected + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid vector index tenant task status", + KR(ret), K(tenant_task.status_), K(expected_state), K(local_tenant_task_)); + } else { + ObPluginVectorIndexMgr *index_ls_mgr = nullptr; + if (OB_FAIL(vector_index_service_->get_ls_index_mgr_map().get_refactored(ls_->get_ls_id(), index_ls_mgr))) { + if (OB_HASH_NOT_EXIST == ret) { // do nothing + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to get vector index ls mgr", KR(ret), + K(tenant_task.status_), K(tenant_id_), K(ls_->get_ls_id())); + } + } else if (OB_ISNULL(index_ls_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid vector index ls mgr", KR(ret), + K(tenant_task.status_), K(tenant_id_), K(ls_->get_ls_id())); + } else if (index_ls_mgr->get_ls_task_ctx().state_ != expected_state) { + if (expected_state != OB_TTL_TASK_RUNNING) { + FLOG_INFO("vector index schedular is not running now", KR(ret), K(index_ls_mgr->get_ls_task_ctx())); + } + // currently, only finish/running vs running + // if change from running to finish/cancel release context + index_ls_mgr->get_ls_task_ctx().reuse(); + index_ls_mgr->get_ls_task_ctx().task_id_++; // not used, ++ if overall task status changed + index_ls_mgr->get_ls_task_ctx().need_check_ = true; + // all finish + index_ls_mgr->get_ls_task_ctx().state_ = expected_state; + } + } + + return ret; +} + +int ObPluginVectorIndexLoadScheduler::execute_one_memdata_sync_task(ObPluginVectorIndexMgr *mgr, + ObPluginVectorIndexTaskCtx *task_ctx) +{ + int ret = OB_SUCCESS; + bool try_schedule = false; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObPluginVectorIndexLoadScheduler not init", KR(ret), K_(tenant_id)); + } else if (OB_ISNULL(mgr) || OB_ISNULL(task_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("vector index adapter or memdata load ctx is null", KR(ret), KPC(mgr), KPC(task_ctx)); + } else { + common::ObSpinLockGuard ctx_guard(task_ctx->lock_); + if (task_ctx->task_status_ != mgr->get_ls_task_ctx().state_) { + // only pending task could be changed to running, reuse ttl task status + if (OB_TTL_TASK_RUNNING == mgr->get_ls_task_ctx().state_) { + if (OB_TTL_TASK_PREPARE == task_ctx->task_status_) { + try_schedule = true; + } else if (OB_TTL_TASK_FINISH == task_ctx->task_status_ + || OB_TTL_TASK_CANCEL == task_ctx->task_status_) { + // do nothing + LOG_INFO("task finish or canceled", K(mgr->get_ls_task_ctx()), KPC(task_ctx)); // ToDo: change to debug level + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("no expected task status", KR(ret), K(mgr->get_ls_task_ctx()), KPC(task_ctx)); + } + } else { // ls not running + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect ls task status", KR(ret), KPC(mgr), KPC(task_ctx)); + } + } else { // if is running do nothing, if not need schedular. + LOG_INFO("no need to schedule task", K(mgr->get_ls_task_ctx()), KPC(task_ctx)); // ToDo: change to debug level + } + + if (OB_SUCC(ret) + && try_schedule + && OB_FAIL(try_schedule_task(mgr, task_ctx))) { + if (OB_SIZE_OVERFLOW != ret) { + LOG_WARN("fail to try schedule dag task", KR(ret)); + } else { + ret = OB_SUCCESS; // size overflow schedule later + } + } + } + + return ret; +} + +int ObPluginVectorIndexLoadScheduler::try_schedule_task(ObPluginVectorIndexMgr *mgr, + ObPluginVectorIndexTaskCtx *task_ctx) +{ + // check and gen dag + int ret = OB_SUCCESS; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", KR(ret), K_(tenant_id)); + } else if (OB_ISNULL(task_ctx) || OB_ISNULL(mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("vector index adapter or memdata load ctx is null", KPC(mgr), KR(ret)); + } else if (can_schedule_tenant(mgr) && can_schedule_task(task_ctx)) { + if (OB_FAIL(generate_vec_idx_memdata_dag(mgr, task_ctx))) { + if (OB_EAGAIN == ret) { + ret = OB_SUCCESS; + } else if (OB_SIZE_OVERFLOW == ret) { // do nothing, handled by caller + } else { + LOG_WARN("fail to generate vector index memdata load dag task", KR(ret)); + } + } else { + inc_dag_ref(); + if (task_ctx->task_start_time_ == OB_INVALID_ID) { + task_ctx->task_start_time_ = ObTimeUtility::current_time(); + } + task_ctx->in_queue_ = true; + // dag maybe already finished, and set status to finish/cancel, + // but here change it to running and could not be scheduler later + task_ctx->task_status_ = OB_TTL_TASK_RUNNING; + } + } else { + LOG_DEBUG("status when try schedule task", KPC(mgr), K(task_ctx)); + } + + return OB_SUCCESS; +} + +int ObPluginVectorIndexLoadScheduler::try_schedule_remaining_tasks(ObPluginVectorIndexMgr *mgr, + ObPluginVectorIndexTaskCtx *current_ctx) +{ + int ret = OB_SUCCESS; + VectorIndexMemSyncMap ¤t_task_map = mgr->get_processing_map(); + FOREACH_X(iter, current_task_map, OB_SUCC(ret)) { + ObPluginVectorIndexTaskCtx *task_ctx = iter->second; + if (OB_ISNULL(task_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid task ctx", KR(ret), KPC(task_ctx)); + } else if (task_ctx == current_ctx) { + // bypass + } else { + common::ObSpinLockGuard ctx_guard(task_ctx->lock_); + if (can_schedule_task(task_ctx) && task_ctx->task_status_ == OB_TTL_TASK_PREPARE) { + LOG_INFO("try schedule remaining task", KPC(task_ctx), KPC(current_ctx)); + if (OB_FAIL(try_schedule_task(mgr, task_ctx))) { + if (OB_SIZE_OVERFLOW != ret) { + LOG_WARN("fail to try schedule dag task", KR(ret)); + } + } + } + } + } + + if (OB_SIZE_OVERFLOW == ret) { // task queue full, schedule later + ret = OB_SUCCESS; + } + + return ret; +} + +// reserved control funtions, remove if not used finally +bool ObPluginVectorIndexLoadScheduler::can_schedule_tenant(const ObPluginVectorIndexMgr *mgr) +{ + bool bret = true; + if (OB_ISNULL(mgr)) { + bret = false; + } + return bret; +} + +// reserved control funtions, remove if not used finally +bool ObPluginVectorIndexLoadScheduler::can_schedule_task(const ObPluginVectorIndexTaskCtx *task_ctx) +{ + bool bret = true; + if (OB_ISNULL(task_ctx)) { + bret = false; + } + return bret; +} + +int ObPluginVectorIndexLoadScheduler::generate_vec_idx_memdata_dag(ObPluginVectorIndexMgr *mgr, + ObPluginVectorIndexTaskCtx *task_ctx) +{ + int ret = OB_SUCCESS; + ObVectorIndexDag *dag = nullptr; + ObVectorIndexTask *memdata_sync_task = nullptr; + + ObTenantDagScheduler *dag_scheduler = nullptr; + if (OB_ISNULL(dag_scheduler = MTL(ObTenantDagScheduler *))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("dag scheduler must not be null", K(ret)); + } else if (OB_FAIL(dag_scheduler->alloc_dag(dag))) { + LOG_WARN("fail to alloc vector index memdata sync dag", KR(ret)); + } else if (OB_ISNULL(dag)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, vector index memdata sync dag is null", KR(ret), KP(dag)); + } else if (OB_FAIL(dag->init(mgr, task_ctx))) { + LOG_WARN("fail to init vector index memdata sync dag", KR(ret)); + } else if (OB_FAIL(dag->alloc_task(memdata_sync_task))) { + LOG_WARN("fail to alloc vector index memdata sync task", KR(ret)); + } else if (OB_ISNULL(memdata_sync_task)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, vector index memdata sync task is null", KR(ret), KP(memdata_sync_task)); + } else if (OB_FAIL(memdata_sync_task->init(this, mgr, task_ctx))) { + LOG_WARN("fail to init vector index memdata sync task", KR(ret)); + } else if (OB_FAIL(dag->add_task(*memdata_sync_task))) { + LOG_WARN("fail to add vector index memdata sync task", KR(ret)); + } else if (OB_FAIL(dag_scheduler->add_dag(dag))) { + // handle special ret code by caller + if (OB_EAGAIN == ret) { + LOG_INFO("vector index memdata sync dag already exists, no need to re-schedule", KR(ret)); + } else if (OB_SIZE_OVERFLOW == ret) { + LOG_INFO("dag scheduler is full", KR(ret)); + } else { + LOG_WARN("fail to add vector index memdata sync dag to queue", KR(ret)); + } + } else { + FLOG_INFO("build vector index memdata sync dag success", KR(ret), KPC(task_ctx)); + } + + if (OB_FAIL(ret) && OB_NOT_NULL(dag_scheduler) && OB_NOT_NULL(dag)) { + dag_scheduler->free_dag(*dag); + } + return ret; +} + +// call try_schedule_remaining_tasks inside +// if all task finish, reset process map +int ObPluginVectorIndexLoadScheduler::check_task_state(ObPluginVectorIndexMgr *mgr, + ObPluginVectorIndexTaskCtx *task_ctx, + bool &is_stop) +{ + int ret = OB_SUCCESS; + // stop current task + is_stop = true; + // do memsync task even if schema changed + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("vector index load scheduler not init", KR(ret), K_(tenant_id)); + } else if (OB_ISNULL(mgr) || OB_ISNULL(task_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("vector index adapter or memdata load ctx is null", KR(ret), KPC(mgr), KPC(task_ctx)); + } else { + common::ObSpinLockGuard ctx_guard(task_ctx->lock_); + // change log level to debug later + if (task_ctx->task_status_ == OB_TTL_TASK_CANCEL + || task_ctx->task_status_ == OB_TTL_TASK_FINISH) { + // do nothing, schedule next + LOG_INFO("cancel current memdata sync task", KR(ret), KPC(task_ctx)); + } else if (task_ctx->task_status_ == OB_TTL_TASK_RUNNING) { + // will schedule this + if (task_ctx->err_code_ == OB_SUCCESS) { + task_ctx->task_status_ = OB_TTL_TASK_FINISH; + LOG_INFO("current memdata sync task finish", KR(ret), KPC(task_ctx)); + // task success, schedule next + } else if (OB_PARTITION_NOT_EXIST == task_ctx->err_code_ + || OB_TABLE_NOT_EXIST == task_ctx->err_code_ + || OB_ERR_UNKNOWN_TABLE == task_ctx->err_code_ + || OB_LS_NOT_EXIST == task_ctx->err_code_ + || OB_TABLET_NOT_EXIST == task_ctx->err_code_ + || OB_REPLICA_NOT_READABLE == task_ctx->err_code_) { + LOG_INFO("cancel current memdata sync task since partition state change", KR(ret), KPC(task_ctx)); + task_ctx->task_status_ = OB_TTL_TASK_CANCEL; + // canceled, schedule next + } else if (OB_ALLOCATE_MEMORY_FAILED == task_ctx->err_code_ + || OB_ERR_VSAG_MEM_LIMIT_EXCEEDED == task_ctx->err_code_) { + LOG_WARN("cancel current memdata sync task since out of resources", KR(ret), KPC(task_ctx)); + task_ctx->task_status_ = OB_TTL_TASK_CANCEL; + } else { // retry + LOG_WARN("current memdata sync task report error, will retry", KR(ret), KPC(task_ctx)); + // task_ctx->task_status_ = OB_TTL_TASK_PREPARE; // still in running state + task_ctx->failure_times_++; + if (task_ctx->failure_times_ >= 3) { + task_ctx->task_status_ = OB_TTL_TASK_CANCEL; + LOG_WARN("current memdata sync task failed too many times, cancel it", KR(ret), KPC(task_ctx)); + } + is_stop = false; + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected task status", KR(ret), KPC(task_ctx)); + } + } + + // current task stopped, schedule remaining tasks + if (is_stop && OB_SUCC(ret)) { + LOG_INFO("stop current memdata sync task", KR(ret), KPC(task_ctx)); + if (OB_FAIL(try_schedule_remaining_tasks(mgr, task_ctx))) { + LOG_WARN("fail to schedule remaining tasks", KR(ret)); + } + } + return ret; +} + +int ObPluginVectorIndexLoadScheduler::check_ls_task_state(ObPluginVectorIndexMgr *mgr) +{ + int ret = OB_SUCCESS; + uint32_t finished_task_count = 0; + VectorIndexMemSyncMap ¤t_task_map = mgr->get_processing_map(); + FOREACH(iter, current_task_map) { + ObPluginVectorIndexTaskCtx *ctx = iter->second; + if (OB_ISNULL(ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null memdta_ctx", K(tenant_id_), KPC(ctx)); + } else if (ctx->task_status_ == OB_TTL_TASK_FINISH // need a waiting state, maybe false finish + || ctx->task_status_ == OB_TTL_TASK_CANCEL) { + finished_task_count++; + } + } + + if (finished_task_count > 0) { + if (finished_task_count == current_task_map.size()) { + // all task finished or canceled + ObPluginVectorIndexLSTaskCtx &ls_task_ctx = mgr->get_ls_task_ctx(); + ls_task_ctx.all_finished_ = true; + ls_task_ctx.need_memdata_sync_ = false; + LOG_INFO("all memdata sync task finished", + K(tenant_id_), K(ls_->get_ls_id()), K(finished_task_count)); + } else { + LOG_INFO("memdata sync task remaining", + K(tenant_id_), K(ls_->get_ls_id()), K(finished_task_count), K(current_task_map.size())); + } + } + + return ret; +} + +int ObPluginVectorIndexLoadScheduler::check_and_execute_adapter_maintenance_task(ObPluginVectorIndexMgr *&mgr) +{ + int ret = OB_SUCCESS; + bool need_check = false; + bool is_dirty = false; + bool is_finished = false; + // if schema version change, or exist partial adapter(create by access) need do maintenance + if (OB_FAIL(check_schema_version())) { + LOG_WARN("fail to check schema version", KR(ret)); + } else if (OB_NOT_NULL(mgr) && OB_FAIL(check_parital_index_adpter_exist(mgr))) { + LOG_WARN("fail to check exist paritial index adapter", KR(ret)); + } else if (local_tenant_task_.need_check_) { + if (OB_FAIL(execute_adapter_maintenance())) { + LOG_WARN("fail to generate tablet tasks", K_(tenant_id)); + } + int tmp_ret = OB_SUCCESS; + if (OB_ISNULL(mgr)) { + tmp_ret = vector_index_service_->get_ls_index_mgr_map().get_refactored(ls_->get_ls_id(), mgr); + if (tmp_ret == OB_SUCCESS) { + } else if (tmp_ret == OB_HASH_NOT_EXIST) { + tmp_ret = OB_SUCCESS; + } else { + LOG_WARN("fail to get vector index ls mgr", KR(tmp_ret), K(tenant_id_), K(ls_->get_ls_id())); + } + } + if (OB_NOT_NULL(mgr)) { + mgr->dump_all_inst(); // for debug, remove later + } + } + + return ret; +} + +int ObPluginVectorIndexLoadScheduler::log_tablets_need_memdata_sync(ObPluginVectorIndexMgr *mgr) +{ + // Notice: only sync complete adapter, partial adapter will be merged to complete next timer schedule + int ret = OB_SUCCESS; + cb_.table_id_array_.reuse(); + cb_.tablet_id_array_.reuse(); + + // follower just refresh adapter statistics, leader submit log need memdata sync + + FOREACH_X(iter, mgr->get_complete_adapter_map(), OB_SUCC(ret)) { + ObPluginVectorIndexAdaptor *adapter = iter->second; + bool need_sync = false; + if (OB_ISNULL(adapter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null adapter", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } else if (iter->first != adapter->get_inc_tablet_id()) { + // do nothing + } else if (cb_.tablet_id_array_.count() >= ObVectorIndexSyncLogCb::VECTOR_INDEX_MAX_SYNC_COUNT) { + // do nothing, wait for next schedule + } else if (OB_FAIL(adapter->check_need_sync_to_follower(need_sync))) { + LOG_WARN("fail to check need memdata sync", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } else if (need_sync && is_leader_) { + if (OB_FAIL(cb_.tablet_id_array_.push_back(iter->first))) { + LOG_WARN("fail to push tablet id need memdata sync", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } else if (OB_FAIL(cb_.table_id_array_.push_back(adapter->get_inc_table_id()))) { + LOG_WARN("fail to push table id need memdata sync", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } + } + } + + if (OB_FAIL(ret) || !is_leader_) { + // do nothing + } else if (cb_.tablet_id_array_.count() > 0) { + if (OB_FAIL(submit_log_())) { + TRANS_LOG(WARN, "fail to submit vector index memdata sync log",KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } else { + TRANS_LOG(INFO, "submit vector index memdata sync log success", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } + } + return ret; +} + +int ObPluginVectorIndexLoadScheduler::execute_all_memdata_sync_task(ObPluginVectorIndexMgr *mgr) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null mgr", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } else { + // other threads will not process current set, it is save to just use iter + VectorIndexMemSyncMap ¤t_map = mgr->get_processing_map(); + FOREACH(iter, current_map) { + // sync_task could countinue even if ls role change + // however forced sync is not need by leader ls + if (OB_FAIL(execute_one_memdata_sync_task(mgr, iter->second))) { + LOG_WARN("fail to execute_one_memdata_sync_task", KR(ret), K(iter->first)); + } + } + } + return ret; +} + +int ObPluginVectorIndexLoadScheduler::check_and_execute_memdata_sync_task(ObPluginVectorIndexMgr *mgr) +{ + int ret = OB_SUCCESS; + bool need_mem_data_sync = false; + bool force_mem_data_sync = false; + if (OB_ISNULL(mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null mgr", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } else if (OB_FAIL(mgr->check_need_mem_data_sync_task(need_mem_data_sync))) { + LOG_WARN("fail to check need mem data sync task", + KR(ret), K(mgr->get_ls_task_ctx()), K(tenant_id_), K(ls_->get_ls_id())); + } else if (need_mem_data_sync) { + mgr->get_ls_task_ctx().non_memdata_task_cycle_ = 0; + mgr->get_ls_task_ctx().need_memdata_sync_ = true; + } else { + mgr->get_ls_task_ctx().non_memdata_task_cycle_++; + if (mgr->get_ls_task_ctx().non_memdata_task_cycle_ + > ObPluginVectorIndexLSTaskCtx::NON_MEMDATA_TASK_CYCLE_MAX) { + // too long to receive any sync task log, sync once forcely + // save all tablet id to current refresh task recorder + mgr->get_ls_task_ctx().non_memdata_task_cycle_ = 0; + mgr->get_ls_task_ctx().need_memdata_sync_ = true; + force_mem_data_sync = true; + } + } + + + if (OB_SUCC(ret) + && force_mem_data_sync + && (current_memory_config_ != 0) + && !is_leader_) { + // push all local tablet to sync candidate + VectorIndexMemSyncMap ¤t_map = mgr->get_processing_map(); + FOREACH(iter, mgr->get_complete_adapter_map()) { + // only use complete adapter, tablet id of no.3 aux index table + ObPluginVectorIndexAdaptor *adapter = iter->second; + ObTabletID tablet_id = iter->first; + if (tablet_id == adapter->get_inc_tablet_id()) { + if (adapter->get_sync_idle_count() == 0) { + adapter->inc_sync_idle_count(); + } else { + // generate one task + char *task_ctx_buf = + static_cast(mgr->get_task_allocator().alloc(sizeof(ObPluginVectorIndexTaskCtx))); + ObPluginVectorIndexTaskCtx* task_ctx = nullptr; + if (OB_ISNULL(task_ctx_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memdata sync task ctx", KR(ret)); + } else if (FALSE_IT(task_ctx = new(task_ctx_buf)ObPluginVectorIndexTaskCtx(tablet_id, adapter->get_inc_table_id()))) { + } else if (OB_FAIL(current_map.set_refactored(tablet_id, task_ctx))) { + LOG_WARN("failed to set vector index memdata sync task ctx", K(ret), K(tablet_id), KPC(task_ctx)); + } + if (OB_FAIL(ret) && OB_NOT_NULL(task_ctx)) { + task_ctx->~ObPluginVectorIndexTaskCtx(); + mgr->get_task_allocator().free(task_ctx); + task_ctx = nullptr; + } + } + } + } + } + + if (OB_SUCC(ret) && mgr->get_ls_task_ctx().need_memdata_sync_) { + if (OB_FAIL(execute_all_memdata_sync_task(mgr))) { + if (OB_SIZE_OVERFLOW != ret) { + LOG_WARN("fail to try schedule memedata_sync dag task", KR(ret)); + } else { + ret = OB_SUCCESS; + } + } + check_ls_task_state(mgr); + } else { + // do nothing + } + + return ret; +} + +int ObPluginVectorIndexLoadScheduler::check_and_execute_tasks() +{ + int ret = OB_SUCCESS; + ObTimeGuard guard("ObPluginVectorIndexLoadScheduler::check_and_handle_event", + VEC_INDEX_LOAD_TIME_NORMAL_THRESHOLD); + ObPluginVectorIndexMgr *index_ls_mgr = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("tablet ttl manager not init", KR(ret)); + } else if (OB_FAIL(vector_index_service_->get_ls_index_mgr_map().get_refactored(ls_->get_ls_id(), index_ls_mgr))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to get vector index ls mgr", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } + } else if (OB_ISNULL(index_ls_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid vector index ls mgr", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } + + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(index_ls_mgr) && index_ls_mgr->get_ls_task_ctx().state_ != OB_TTL_TASK_RUNNING) { + // do nothing, ToDo: change log level later + LOG_INFO("not vector index schedular running", + K(index_ls_mgr->get_ls_task_ctx().state_), K(ls_->get_ls_id())); + } else { + // Notice: index_ls_mgr maybe null + // create / remove adapter, check need update & write mem sync log + if (OB_FAIL(check_and_execute_adapter_maintenance_task(index_ls_mgr))) { + LOG_WARN("fail to check and execute adapter maintenance task", + KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } + // Notice: leader write sync log, do memdata_sync only one loop(role changed from follower to leader) + // explicit cover error code + ret = OB_SUCCESS; + // write tablets need memdata sync to clog + if (OB_NOT_NULL(index_ls_mgr) + && (current_memory_config_ != 0) + && OB_FAIL(log_tablets_need_memdata_sync(index_ls_mgr))) { + LOG_WARN("fail to log tablets need memdata sync", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } + + // explicit cover error code + ret = OB_SUCCESS; + // mem_sync task + if (OB_NOT_NULL(index_ls_mgr) && OB_FAIL(check_and_execute_memdata_sync_task(index_ls_mgr))) { + LOG_WARN("fail to check and execute memdata sync task", + KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } + } + return ret; +} + +void ObPluginVectorIndexLoadScheduler::run_task() +{ + ObCurTraceId::init(GCONF.self_addr_); + ObTimeGuard guard("ObPluginVectorIndexLoadScheduler::run_task", + VEC_INDEX_LOAD_TIME_NORMAL_THRESHOLD); + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("vector index load task not inited", KR(ret)); + } else if (!ObTTLUtil::check_can_process_tenant_tasks(tenant_id_)) { + // check ObMultiVersionSchemaService ready + LOG_INFO("schema service not ready", KR(ret)); + } else if (ATOMIC_BCAS(&need_do_for_switch_, true, false)) { + // reserved, do nothing + LOG_INFO("switch leader", K(tenant_id_), K(ls_->get_ls_id()), K(is_leader_), K(is_stopped_)); + } else if (check_can_do_work()){ + if (OB_FAIL(check_tenant_memory())) { + LOG_WARN("check vector index resource failed", KR(ret)); + } else if (OB_FAIL(reload_tenant_task())) { + LOG_WARN("fail to reload tenant task", KR(ret)); + } else if (OB_FAIL(check_and_execute_tasks())) { + LOG_WARN("fail to scan and handle all tenant event", KR(ret)); + } + } +} + +OB_SERIALIZE_MEMBER(ObVectorIndexSyncLog, flags_, tablet_id_array_, table_id_array_) + +int ObPluginVectorIndexLoadScheduler::submit_log_() +{ + int ret = OB_SUCCESS; + if (cb_.tablet_id_array_.count() == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get empty tablet id array", KR(ret)); + } else { + ObVectorIndexSyncLog ls_log(cb_.tablet_id_array_, cb_.table_id_array_); + palf::LSN lsn; + SCN base_scn = SCN::min_scn(); + SCN scn; + logservice::ObLogBaseHeader + base_header(logservice::ObLogBaseType::VEC_INDEX_LOG_BASE_TYPE, + logservice::ObReplayBarrierType::NO_NEED_BARRIER); // no need reply hint + uint32_t log_size = base_header.get_serialize_size() + ls_log.get_serialize_size(); + if (log_size > ObVectorIndexSyncLogCb::VECTOR_INDEX_SYNC_LOG_MAX_LENGTH) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("log size is too large", KR(ret), K(log_size), K(cb_.tablet_id_array_.count())); + } else if (OB_ISNULL(cb_.log_buffer_)) { + cb_.log_buffer_ = static_cast(ob_malloc(ObVectorIndexSyncLogCb::VECTOR_INDEX_SYNC_LOG_MAX_LENGTH, + ObMemAttr(tenant_id_, + "VEC_INDEX_LOG"))); + cb_.log_buffer_len_ = ObVectorIndexSyncLogCb::VECTOR_INDEX_SYNC_LOG_MAX_LENGTH; + if (OB_ISNULL(cb_.log_buffer_)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc vec index memdata sync log buffer", KR(ret), K(log_size)); + } + } + + cb_.pos_ = 0; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(base_header.serialize(cb_.log_buffer_, cb_.log_buffer_len_, cb_.pos_))) { + TRANS_LOG(WARN, "ObVectorIndexSyncLog serialize base header error", + KR(ret), KP(cb_.log_buffer_), K(cb_.log_buffer_len_), K(cb_.pos_)); + } else if (OB_FAIL(ls_log.serialize(cb_.log_buffer_, cb_.log_buffer_len_, cb_.pos_))) { + TRANS_LOG(WARN, "ObVectorIndexSyncLog serialize vec index memdata sync log error", + KR(ret), KP(cb_.log_buffer_), K(cb_.log_buffer_len_), K(cb_.pos_)); + } else if (OB_FAIL(ls_->get_log_handler()->append(cb_.log_buffer_, + cb_.pos_, + base_scn, + false, + false, + &cb_, + lsn, + scn))) { + cb_.reset(); + TRANS_LOG(WARN, "vector index memdata sync log submit error", + KR(ret), KP(cb_.log_buffer_), K(cb_.pos_)); + } else { + TRANS_LOG(INFO, "submit vector index memdata sync log success", + K(tenant_id_), K(ls_->get_ls_id()), K(base_scn), K(lsn), K(scn)); + } + cb_.tablet_id_array_.reuse(); + cb_.table_id_array_.reuse(); + } + return ret; +} + +// may not need +int ObPluginVectorIndexLoadScheduler::handle_submit_callback(const bool success, const share::SCN log_ts) +{ + int ret = OB_SUCCESS; + return ret; +} + +int ObPluginVectorIndexLoadScheduler::handle_replay_result(ObVectorIndexSyncLog &ls_log) +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexMgr *mgr = nullptr; + if (OB_FAIL(vector_index_service_->acquire_vector_index_mgr(ls_->get_ls_id(), mgr))) { + LOG_WARN("fail to acquire vector index ls mgr", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } else if (OB_ISNULL(mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid vector index ls mgr", KR(ret), K(tenant_id_), K(ls_->get_ls_id())); + } else { + VectorIndexMemSyncMap &waiting_task_map = mgr->get_waiting_map(); + for (int64_t i = 0; OB_SUCC(ret) && i < ls_log.get_tablet_id_array().count(); i++) { + ObTabletID tablet_id = ls_log.get_tablet_id_array().at(i); + uint64_t table_id = ls_log.get_table_id_array().at(i); + char *task_ctx_buf = + static_cast(mgr->get_task_allocator().alloc(sizeof(ObPluginVectorIndexTaskCtx))); + ObPluginVectorIndexTaskCtx* task_ctx = nullptr; + + if (OB_ISNULL(task_ctx_buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memdata sync task ctx", KR(ret)); + } else if (FALSE_IT(task_ctx = new(task_ctx_buf)ObPluginVectorIndexTaskCtx(tablet_id, table_id))) { + } else if (OB_FAIL(waiting_task_map.set_refactored(tablet_id, task_ctx))) { + if (ret != OB_HASH_EXIST) { + LOG_WARN("failed to set vector index memdata sync task ctx", K(ret), K(tablet_id), KPC(task_ctx)); + } else { + ret = OB_SUCCESS; + LOG_INFO("duplicate vector index memdata sync task ctx", K(ret), K(tablet_id), KPC(task_ctx)); + } + } else { + LOG_INFO("success get replay vector index memdata sync task ctx", K(ret), K(tablet_id), KPC(task_ctx)); + } + if (OB_FAIL(ret) && OB_NOT_NULL(task_ctx)) { + task_ctx->~ObPluginVectorIndexTaskCtx(); + mgr->get_task_allocator().free(task_ctx); // not really free + task_ctx = nullptr; + } + } + } + + return ret; +} + +int ObPluginVectorIndexLoadScheduler::replay(const void *buffer, + const int64_t buf_size, + const palf::LSN &lsn, + const share::SCN &log_scn) +{ + int ret = OB_SUCCESS; + logservice::ObLogBaseHeader base_header; + int64_t tmp_pos = 0; + const char *log_buf = static_cast(buffer); + ObVectorIndexTabletIDArray tmp_tablet_id_array_; + ObVectorIndexTableIDArray tmp_table_id_array_; + ObVectorIndexSyncLog ls_log(tmp_tablet_id_array_, tmp_table_id_array_); + + // need ls, and mgr + if (OB_FAIL(base_header.deserialize(log_buf, buf_size, tmp_pos))) { + TRANS_LOG(WARN, "log base header deserialize error", K(ret), KP(buffer), K(buf_size), K(lsn), K(log_scn)); + } else if (OB_FAIL(ls_log.deserialize((char *)buffer, buf_size, tmp_pos))) { + TRANS_LOG(WARN, "desrialize tx_log_body error", K(ret), KP(buffer), K(buf_size), K(lsn), K(log_scn)); + } else if (OB_FAIL(handle_replay_result(ls_log))) { + TRANS_LOG(WARN, "handle replay result fail", K(ret), K(ls_log), K(log_scn)); + } else { + // do nothing + } + LOG_INFO("ObPluginVectorIndexLoadScheduler replay", K(ret), K(ls_log), K(base_header)); // debug log + return ret; +} + +// checkpoint interfaces +int ObPluginVectorIndexLoadScheduler::flush(share::SCN &scn) +{ + UNUSED(scn); + return OB_SUCCESS; +} + +share::SCN ObPluginVectorIndexLoadScheduler::get_rec_scn() +{ + return share::SCN::max_scn(); +} + +// role change interfaces + +int ObPluginVectorIndexLoadScheduler::switch_to_leader() +{ + int64_t start_time_us = ObTimeUtility::current_time(); + FLOG_INFO("vector index scheduler: begin to switch_to_leader", K_(tenant_id), KPC_(ls), K(start_time_us)); + int ret = OB_SUCCESS; + if (!is_inited_) { + ret = OB_NOT_INIT; + LOG_WARN("vector index load scheduler not inited", KR(ret)); + } else { + ATOMIC_STORE(&is_leader_, true); + ATOMIC_STORE(&need_do_for_switch_, true); + } + const int64_t cost_us = ObTimeUtility::current_time() - start_time_us; + FLOG_INFO("vector index scheduler: finish to switch_to_leader", KR(ret), K_(tenant_id), KPC_(ls), K(cost_us)); + return ret; +} + +int ObPluginVectorIndexLoadScheduler::switch_to_follower_gracefully() +{ + int ret = OB_SUCCESS; + inner_switch_to_follower_(); + return ret; +} + +void ObPluginVectorIndexLoadScheduler::switch_to_follower_forcedly() +{ + inner_switch_to_follower_(); +} + +void ObPluginVectorIndexLoadScheduler::inner_switch_to_follower_() +{ + FLOG_INFO("vector index scheduler: begin to switch_to_follower", K_(tenant_id), KPC_(ls)); + const int64_t start_time_us = ObTimeUtility::current_time(); + ATOMIC_STORE(&is_leader_, false); + ATOMIC_STORE(&need_do_for_switch_, true); + const int64_t cost_us = ObTimeUtility::current_time() - start_time_us; + FLOG_INFO("vector index scheduler: finish to switch_to_follower", K_(tenant_id), KPC_(ls), K(cost_us)); +} + +int ObPluginVectorIndexLoadScheduler::safe_to_destroy(bool &is_safe) +{ + int ret = OB_SUCCESS; + is_safe = true; + int64_t dag_ref = get_dag_ref(); + if (0 != dag_ref) { + if (REACH_TIME_INTERVAL(60L * 1000000)) { // 60s + LOG_WARN("vector index scheduler can't destroy", K(dag_ref)); + } + is_safe = false; + } + return ret; +} + +// ------ implement mem sync task ------ +int ObVectorIndexDag::init(ObPluginVectorIndexMgr *mgr, ObPluginVectorIndexTaskCtx *task_ctx) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", KR(ret)); + } else if (OB_ISNULL(mgr) || OB_ISNULL(task_ctx)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), KP(mgr), KP(task_ctx)); + } else { + compat_mode_ = lib::Worker::CompatMode::MYSQL; // only support mysql now + param_.tenant_id_ = mgr->get_tenant_id(); + param_.ls_id_ = mgr->get_ls_id(); + param_.table_id_ = task_ctx->index_table_id_; + param_.tablet_id_ = task_ctx->index_tablet_id_; + param_.task_ctx_ = task_ctx; + + is_inited_ = true; + } + return ret; +} + +bool ObVectorIndexDag::operator==(const ObIDag& other) const +{ + bool is_equal = false; + if (OB_UNLIKELY(this == &other)) { + is_equal = true; + } else if (get_type() == other.get_type()) { + const ObVectorIndexDag &other_dag = static_cast(other); + if (OB_UNLIKELY(!param_.is_valid() || !other_dag.param_.is_valid())) { + LOG_ERROR_RET(OB_ERR_SYS, "invalid argument", K_(param), K(other_dag.param_)); + } else { + is_equal = (param_ == other_dag.param_); + } + } + return is_equal; +} + +int64_t ObVectorIndexDag::hash() const +{ + int64_t hash_value = 0; + if (OB_UNLIKELY(!is_inited_ || !param_.is_valid())) { + LOG_ERROR_RET(OB_ERR_SYS, "invalid argument", K(is_inited_), K_(param)); + } else { + hash_value = common::murmurhash(¶m_.tenant_id_, sizeof(param_.tenant_id_), hash_value); + hash_value += param_.ls_id_.hash(); + hash_value += common::murmurhash(¶m_.tenant_id_, sizeof(param_.tenant_id_), hash_value); + hash_value += param_.tablet_id_.hash(); + } + return hash_value; +} + +int ObVectorIndexDag::fill_dag_key(char *buf, const int64_t buf_len) const +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObVectorIndexDag has not been initialized", K(is_inited_), K_(param)); + } else if (OB_FAIL(databuff_printf(buf, buf_len, "vector index memdata sync task: " + "tenant_id = %ld, ls_id = %ld, table_id = %ld, tablet_id = %ld", + param_.tenant_id_, + param_.ls_id_.id(), + param_.table_id_, + param_.tablet_id_.id()))) { + LOG_WARN("fail to fill dag key", KR(ret), K(param_)); + } + return ret; +} + +int ObVectorIndexDag::fill_info_param(compaction::ObIBasicInfoParam *&out_param, ObIAllocator &allocator) const +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObVectorIndexDag has not been initialized", K(is_inited_), K_(param)); + } else if (OB_FAIL(ADD_DAG_WARN_INFO_PARAM(out_param, allocator, get_type(), + static_cast(param_.tenant_id_), + static_cast(param_.ls_id_.id()), + static_cast(param_.table_id_), + static_cast(param_.tablet_id_.id())))) { + LOG_WARN("fail to fill info param", KR(ret), K_(param)); + } + return ret; +} + +int ObVectorIndexTask::init(ObPluginVectorIndexLoadScheduler *schedular, + ObPluginVectorIndexMgr *mgr, + ObPluginVectorIndexTaskCtx *task_ctx) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", KR(ret)); + } else if (OB_ISNULL(schedular) || OB_ISNULL(mgr) || OB_ISNULL(task_ctx)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), KP(schedular), KP(mgr), KP(task_ctx)); + } else { + ls_id_ = mgr->get_ls_id(); + vec_idx_scheduler_ = schedular; + vec_idx_mgr_ = mgr; + task_ctx_ = task_ctx; + read_snapshot_.reset(); + is_inited_ = true; + } + return ret; +} + +int ObVectorIndexTask::process() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("vector index task has not been initialized", K(is_inited_)); + } else if (OB_ISNULL(vec_idx_mgr_) || OB_ISNULL(task_ctx_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), KP(vec_idx_mgr_), KP(task_ctx_)); + } else if (vec_idx_scheduler_->is_stopped()) { + common::ObSpinLockGuard ctx_guard(task_ctx_->lock_); + task_ctx_->err_code_ = OB_SUCCESS; + task_ctx_->task_status_ = OB_TTL_TASK_FINISH; + LOG_INFO("vec index scheduler is stopped, memdata sync task mark finish", KR(ret), KPC(task_ctx_)); + } else { + bool need_stop = false; + + while(!need_stop && OB_SUCC(ret)) { + // need set context? should set attr in constructor + lib::ContextParam param; + param.set_mem_attr(MTL_ID(), "VecIdxTaskCtx", ObCtxIds::DEFAULT_CTX_ID) // 这里是dag的MTL + .set_properties(lib::USE_TL_PAGE_OPTIONAL); + CREATE_WITH_TEMP_CONTEXT(param) { + if (OB_FAIL(process_one())) { + LOG_WARN("fail to process one", KR(ret), K(ls_id_), KPC(task_ctx_)); + } + allocator_.reuse(); + ret = OB_SUCCESS; // continue to try schedular remainig tasks + + if (OB_FAIL(vec_idx_scheduler_->check_task_state(vec_idx_mgr_, task_ctx_, need_stop))) { + LOG_WARN("fail to check task state", KR(ret), K(ls_id_), KPC(task_ctx_)); + ret = OB_SUCCESS; // continue to try schedular remainig tasks + } + } + } + } + vec_idx_scheduler_->dec_dag_ref(); + return ret; +} + +int ObVectorIndexTask::process_one() +{ + int ret = OB_SUCCESS; + int64_t start_time = ObTimeUtil::current_time(); + ObPluginVectorIndexAdapterGuard adpt_guard; + + if (OB_FAIL(ObPluginVectorIndexUtils::get_task_read_snapshot(ls_id_, read_snapshot_))) { + LOG_WARN("fail to get task read snapshot", KR(ret), K(ls_id_), KPC(task_ctx_)); + } else if (OB_FAIL(vec_idx_mgr_->get_adapter_inst_guard(task_ctx_->index_tablet_id_, adpt_guard))) { + LOG_WARN("fail to get adapter instance", KR(ret), K(ls_id_), KPC(task_ctx_)); + } else if (OB_FAIL(ObPluginVectorIndexUtils::refresh_memdata(ls_id_, + adpt_guard.get_adatper(), + read_snapshot_, + allocator_))) { + LOG_WARN("fail to refresh memdata", KR(ret), K(ls_id_), KPC(task_ctx_)); + } + + if (OB_SUCC(ret)) { + task_ctx_->err_code_ = OB_SUCCESS; + adpt_guard.get_adatper()->sync_finish(); + adpt_guard.get_adatper()->reset_sync_idle_count(); + } else { + task_ctx_->err_code_ = ret; + if (OB_NOT_NULL(adpt_guard.get_adatper())) { + adpt_guard.get_adatper()->sync_finish(); + adpt_guard.get_adatper()->sync_fail(); + adpt_guard.get_adatper()->reset_sync_idle_count(); + } + } + + int64_t cost = ObTimeUtil::current_time() - start_time; + LOG_INFO("finish process one", KR(ret), K(cost), K(ls_id_), KPC(task_ctx_)); // change to debug later + + return ret; +} + +} +} \ No newline at end of file diff --git a/src/share/vector_index/ob_plugin_vector_index_scheduler.h b/src/share/vector_index/ob_plugin_vector_index_scheduler.h new file mode 100644 index 0000000000..ed5ee35bdf --- /dev/null +++ b/src/share/vector_index/ob_plugin_vector_index_scheduler.h @@ -0,0 +1,427 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_OBSERVER_OB_PLUGIN_VECTOR_INDEX_SCHEDULER_DEFINE_H_ +#define OCEANBASE_OBSERVER_OB_PLUGIN_VECTOR_INDEX_SCHEDULER_DEFINE_H_ +#include "share/ob_ls_id.h" +#include "share/scn.h" +#include "lib/lock/ob_recursive_mutex.h" +#include "share/rc/ob_tenant_base.h" +#include "share/vector_index/ob_plugin_vector_index_adaptor.h" +#include "observer/table/ttl/ob_tenant_ttl_manager.h" +#include "logservice/ob_append_callback.h" +#include "logservice/ob_log_base_type.h" +#include "logservice/ob_log_handler.h" + +namespace oceanbase +{ +namespace share +{ + +class ObPluginVectorIndexService; +class ObPluginVectorIndexMgr; + +static const int64_t VECTOR_INDEX_TABLET_ID_COUNT = 100; +typedef ObSEArray ObVectorIndexTabletIDArray; +typedef ObSEArray ObVectorIndexTableIDArray; +class ObVectorIndexSyncLog +{ +public: + OB_UNIS_VERSION(1); + +public: + ObVectorIndexSyncLog(ObVectorIndexTabletIDArray &array, ObVectorIndexTableIDArray &table_id_array) + : flags_(0), + tablet_id_array_(array), + table_id_array_(table_id_array) + {} + TO_STRING_KV(K_(flags), K_(tablet_id_array), K_(table_id_array)); + ObVectorIndexTabletIDArray &get_tablet_id_array() { return tablet_id_array_; } + ObVectorIndexTableIDArray &get_table_id_array() { return table_id_array_; } + +private: + uint32_t flags_; + ObVectorIndexTabletIDArray &tablet_id_array_; + ObVectorIndexTableIDArray &table_id_array_; +}; + +class ObVectorIndexSyncLogCb : public logservice::AppendCb +{ +public: + ObVectorIndexSyncLogCb() + : log_buffer_(nullptr), + log_buffer_len_(0), + pos_(0) + { + reset(); + } + + ~ObVectorIndexSyncLogCb() { + destory(); + } + + void reset() + { + ATOMIC_SET(&is_callback_invoked_, false); + ATOMIC_SET(&is_success_, false); + } + + void destory() + { + if (OB_NOT_NULL(log_buffer_)) { + ob_free(log_buffer_); + log_buffer_ = nullptr; + } + } + + virtual int on_success() override + { + ATOMIC_SET(&is_success_, true); + MEM_BARRIER(); + ATOMIC_SET(&is_callback_invoked_, true); + return OB_SUCCESS; + } + virtual int on_failure() override + { + ATOMIC_SET(&is_callback_invoked_, true); + return OB_SUCCESS; + } + + TO_STRING_KV(K_(is_callback_invoked), K_(is_success), K_(tablet_id_array), K_(table_id_array), + KP_(log_buffer), K_(log_buffer_len), K_(pos)); + OB_INLINE bool is_invoked() const { return ATOMIC_LOAD(&is_callback_invoked_); } + OB_INLINE bool is_success() const { return ATOMIC_LOAD(&is_success_); } + +public: + ObVectorIndexTabletIDArray tablet_id_array_; + ObVectorIndexTableIDArray table_id_array_; + static const uint32 VECTOR_INDEX_SYNC_LOG_MAX_LENGTH = 16 * 1024; // Max 16KB each log + static const uint32_t VECTOR_INDEX_MAX_SYNC_COUNT = 512; + char *log_buffer_; + uint32_t log_buffer_len_; + int64_t pos_; + +private: + bool is_callback_invoked_; + bool is_success_; + +}; + +typedef common::ObTTLTaskStatus ObVectorIndexTaskStatus; +typedef common::ObTTLStatus ObVectorIndexTenantStatus; + +// task context of a tenant +class ObPluginVectorIndexTenantTaskCtx +{ +public: + ObPluginVectorIndexTenantTaskCtx() + : need_check_(false), + is_dirty_(false), + state_(common::ObTTLTaskStatus::OB_TTL_TASK_INVALID) + {} + + virtual ~ObPluginVectorIndexTenantTaskCtx() {} + void reuse() + { + need_check_ = false; + is_dirty_ = false; + state_ = common::ObTTLTaskStatus::OB_TTL_TASK_FINISH; + } + + TO_STRING_KV(K_(need_check), K_(is_dirty), K_(state)); + +public: + bool need_check_; + bool is_dirty_; + ObVectorIndexTaskStatus state_; +}; + +// task context of a ls +struct ObPluginVectorIndexLSTaskCtx +{ + void reuse() + { + need_check_ = false; + all_finished_ = false; + state_ = common::ObTTLTaskStatus::OB_TTL_TASK_FINISH; + } + + TO_STRING_KV(K_(task_id), K_(need_check), K_(all_finished), K_(state)); + static const uint32 NON_MEMDATA_TASK_CYCLE_MAX = 90; // check force memdata sync every 15 mins + uint32 non_memdata_task_cycle_; + bool need_memdata_sync_; + int64_t task_id_; + bool need_check_; + bool all_finished_; + ObVectorIndexTaskStatus state_; +}; + +// memdata sync task ctx +struct ObPluginVectorIndexTaskCtx +{ + ObPluginVectorIndexTaskCtx(ObTabletID &index_tablet_id, uint64_t index_table_id) + : index_tablet_id_(index_tablet_id), + index_table_id_(index_table_id), + task_start_time_(0), + last_modify_time_(0), + failure_times_(0), + err_code_(OB_SUCCESS), + in_queue_(false), + task_status_(ObVectorIndexTaskStatus::OB_TTL_TASK_PREPARE) + {} + TO_STRING_KV(K_(index_tablet_id), K_(index_table_id), K_(task_start_time), K_(last_modify_time), + K_(failure_times), K_(in_queue), K_(task_status)); + ObTabletID index_tablet_id_; + uint64_t index_table_id_; + int64_t task_start_time_; + int64_t last_modify_time_; + int64_t failure_times_; + int64_t err_code_; + bool in_queue_; // whether in dag queue or not + ObVectorIndexTaskStatus task_status_; + common::ObSpinLock lock_; // lock for update task_status_ +}; +typedef hash::ObHashMap ObVecIdxSharedTableInfoMap; + +// schedule vector tasks for a ls +class ObPluginVectorIndexLoadScheduler : public common::ObTimerTask, + public logservice::ObIReplaySubHandler, + public logservice::ObICheckpointSubHandler, + public logservice::ObIRoleChangeSubHandler +{ +public: + ObPluginVectorIndexLoadScheduler() + : is_inited_(false), + is_leader_(true), + need_do_for_switch_(false), + is_stopped_(false), + tenant_id_(OB_INVALID_TENANT_ID), + ttl_tablet_timer_tg_id_(0), + interval_factor_(1), + basic_period_(VEC_INDEX_SCHEDULAR_BASIC_PERIOD), + current_memory_config_(0), + dag_ref_cnt_(0), + vector_index_service_(nullptr), + ls_(nullptr), + local_schema_version_(OB_INVALID_VERSION), + local_tenant_task_(), + cb_() + {} + virtual ~ObPluginVectorIndexLoadScheduler() + { + } + + int init(uint64_t tenant_id, ObLS *ls, int ttl_tablet_timer_tg_id_); + virtual void runTimerTask() override; + void run_task(); + bool is_inited() { return is_inited_; } + + ObPluginVectorIndexService *get_vector_index_service() { return vector_index_service_; } + + bool check_can_do_work(); + int check_tenant_memory(); + int check_schema_version(); + void mark_tenant_need_check(); + void mark_tenant_checked(); + int reload_tenant_task(); + int check_and_execute_tasks(); // was check_and_handle_event + int check_and_execute_adapter_maintenance_task(ObPluginVectorIndexMgr *&mgr); + int check_and_execute_memdata_sync_task(ObPluginVectorIndexMgr *mgr); + int sync_all_dirty_task(ObIArray& dirty_tasks); + int generate_batch_tablet_task(); + + // core interfaces + int execute_adapter_maintenance(); + int acquire_adapter_in_maintenance(const int64_t table_id, const ObTableSchema *table_schema); + int set_shared_table_info_in_maintenance(const int64_t table_id, + const ObTableSchema *table_schema, + ObVecIdxSharedTableInfoMap &shared_table_info_map); + int check_task_state(ObPluginVectorIndexMgr *mgr, ObPluginVectorIndexTaskCtx *task_ctx, bool &is_stop); + int check_is_vector_index_table(const ObTableSchema &table_schema, + bool &is_vector_index_table, + bool &is_shared_index_table); + void clean_deprecated_adapters(); + int check_parital_index_adpter_exist(ObPluginVectorIndexMgr *mgr); + + int log_tablets_need_memdata_sync(ObPluginVectorIndexMgr *mgr); + int execute_all_memdata_sync_task(ObPluginVectorIndexMgr *mgr); + int execute_one_memdata_sync_task(ObPluginVectorIndexMgr *mgr, ObPluginVectorIndexTaskCtx *ctx); + int check_ls_task_state(ObPluginVectorIndexMgr *mgr); + + // task generation interfaces + bool can_schedule_tenant(const ObPluginVectorIndexMgr *mgr); + bool can_schedule_task(const ObPluginVectorIndexTaskCtx *task_ctx); + int try_schedule_task(ObPluginVectorIndexMgr *mgr, ObPluginVectorIndexTaskCtx *task_ctx); + int try_schedule_remaining_tasks(ObPluginVectorIndexMgr *mgr, ObPluginVectorIndexTaskCtx *current_ctx); + int generate_vec_idx_memdata_dag(ObPluginVectorIndexMgr *mgr, ObPluginVectorIndexTaskCtx *task_ctx); + + // logger interfaces + int handle_submit_callback(const bool success, const share::SCN log_ts); + int handle_replay_result(ObVectorIndexSyncLog &ls_log); + int replay(const void *buffer, const int64_t buf_size, const palf::LSN &lsn, const share::SCN &log_scn); + + // checkpoint interfaces + int flush(share::SCN &scn); + share::SCN get_rec_scn(); + + // role change interfaces + int switch_to_follower_gracefully(); + void switch_to_follower_forcedly(); + int resume_leader() { return OB_SUCCESS; } + int switch_to_leader(); + + // task save destory + void stop() { is_stopped_= true; }; + bool is_stopped() { return (is_stopped_ == true); }; + void inc_dag_ref() { ATOMIC_INC(&dag_ref_cnt_); } + void dec_dag_ref() { ATOMIC_DEC(&dag_ref_cnt_); } + int64_t get_dag_ref() const { return ATOMIC_LOAD(&dag_ref_cnt_); } + + int safe_to_destroy(bool &is_safe); + + TO_STRING_KV(K_(is_inited), K_(is_leader), K_(need_do_for_switch), K_(is_stopped), + K_(tenant_id), K_(ttl_tablet_timer_tg_id), K_(interval_factor), + K_(basic_period), K_(current_memory_config), K_(dag_ref_cnt), + KP_(vector_index_service), KP_(ls), + K_(local_schema_version), K_(local_tenant_task)); + +private: + int submit_log_(); + void inner_switch_to_follower_(); + +private: + + static const int64_t VEC_INDEX_SCHEDULAR_BASIC_PERIOD = 10 * 1000 * 1000; // 10s + static const int64_t VEC_INDEX_LOAD_TIME_NORMAL_THRESHOLD = 30 * 1000 * 1000; // 30s + static const int64_t DEFAULT_TABLE_ARRAY_SIZE = 200; + static const int64_t TBALE_GENERATE_BATCH_SIZE = 200; + + // 1. is_leader_: Only leader is allowed to generate memdata sync logs, + // but execute of memdata sync task is allowed on leader/follower + // 2. need_do_for_switch_ is intended to skip some loops currently being executed, + // but in the context of vector indexing, only when leader to follwer need processing currently, + // which duplicates the function of is_leader_. + // 3. is_stopped_ is set only when the timer is stopped, stop to schedule memedata sync tasks + + bool is_inited_; + bool is_leader_; + bool need_do_for_switch_; + bool is_stopped_; + uint64_t tenant_id_; + int ttl_tablet_timer_tg_id_; + int interval_factor_; + int64_t basic_period_; + int64_t current_memory_config_; + volatile int64_t dag_ref_cnt_; + ObPluginVectorIndexService *vector_index_service_; + ObLS *ls_; + int64_t local_schema_version_; + ObPluginVectorIndexTenantTaskCtx local_tenant_task_; + ObVectorIndexSyncLogCb cb_; +}; + +class ObVectorIndexTask : public share::ObITask +{ +public: + ObVectorIndexTask() + : ObITask(ObITaskType::TASK_TYPE_VECTOR_INDEX_MEMDATA_SYNC), + is_inited_(false), + ls_id_(share::ObLSID::INVALID_LS_ID), + vec_idx_scheduler_(nullptr), + vec_idx_mgr_(nullptr), + task_ctx_(nullptr), + read_snapshot_(), + allocator_(ObMemAttr(MTL_ID(), "VecIdxTaskCtx")) + {} + ~ObVectorIndexTask() {}; + int init(ObPluginVectorIndexLoadScheduler *schedular, + ObPluginVectorIndexMgr *mgr, + ObPluginVectorIndexTaskCtx *task_ctx); + common::ObIAllocator &get_allocator() { return allocator_; } + virtual int process() override; + TO_STRING_KV(K_(is_inited), K_(ls_id), K_(read_snapshot), KPC_(task_ctx)); +private: + int process_one(); + +private: + bool is_inited_; + share::ObLSID ls_id_; + ObPluginVectorIndexLoadScheduler *vec_idx_scheduler_; + ObPluginVectorIndexMgr *vec_idx_mgr_; + ObPluginVectorIndexTaskCtx *task_ctx_; + SCN read_snapshot_; + common::ObArenaAllocator allocator_; + + DISALLOW_COPY_AND_ASSIGN(ObVectorIndexTask); +}; + +class ObVectorIndexTaskParam final +{ +public: + ObVectorIndexTaskParam() + : tenant_id_(OB_INVALID_ID), + ls_id_(share::ObLSID::INVALID_LS_ID), + table_id_(OB_INVALID_ID), + tablet_id_(common::OB_INVALID_ID), + task_ctx_(nullptr) + {} + ~ObVectorIndexTaskParam() {} + bool is_valid() const + { + return tenant_id_ != OB_INVALID_ID + && ls_id_.is_valid() + && table_id_ != OB_INVALID_ID + && tablet_id_.is_valid(); + } + bool operator==(const ObVectorIndexTaskParam& param) const + { + return tenant_id_ == param.tenant_id_ + && ls_id_ == param.ls_id_ + && table_id_ == param.table_id_ + && tablet_id_ == param.tablet_id_; + } + TO_STRING_KV(K_(ls_id), K_(tablet_id), KP_(task_ctx)); +public: + int64_t tenant_id_; + share::ObLSID ls_id_; + uint64_t table_id_; + common::ObTabletID tablet_id_; + ObPluginVectorIndexTaskCtx *task_ctx_; +}; + +class ObVectorIndexDag final: public share::ObIDag +{ +public: + ObVectorIndexDag() + : ObIDag(ObDagType::DAG_TYPE_VECTOR_INDEX), is_inited_(false), + param_(), + compat_mode_(lib::Worker::CompatMode::INVALID) + {} + virtual ~ObVectorIndexDag() {} + virtual bool operator==(const ObIDag& other) const override; + virtual int64_t hash() const override; + int init(ObPluginVectorIndexMgr *mgr, ObPluginVectorIndexTaskCtx *task_ctx); + virtual lib::Worker::CompatMode get_compat_mode() const override { return compat_mode_; } + virtual int fill_dag_key(char *buf, const int64_t buf_len) const override; + virtual int fill_info_param(compaction::ObIBasicInfoParam *&out_param, ObIAllocator &allocator) const override; + virtual uint64_t get_consumer_group_id() const override { return consumer_group_id_; } + virtual bool is_ha_dag() const { return false; } +private: + bool is_inited_; + ObVectorIndexTaskParam param_; + lib::Worker::CompatMode compat_mode_; + DISALLOW_COPY_AND_ASSIGN(ObVectorIndexDag); +}; + +} // namespace share +} // namespace oceanbase +#endif \ No newline at end of file diff --git a/src/share/vector_index/ob_plugin_vector_index_serialize.cpp b/src/share/vector_index/ob_plugin_vector_index_serialize.cpp new file mode 100644 index 0000000000..37b1e694a4 --- /dev/null +++ b/src/share/vector_index/ob_plugin_vector_index_serialize.cpp @@ -0,0 +1,340 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SHARE +#include "ob_plugin_vector_index_serialize.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" +#include "storage/lob/ob_lob_manager.h" +#include "deps/oblib/src/lib/vector/ob_vector_util.h" +#include "share/vector_index/ob_vector_index_util.h" +#include "storage/access/ob_table_scan_iterator.h" + +namespace oceanbase +{ +namespace share +{ +/* + * ObOStreamBuf implement + * */ +std::streamsize ObOStreamBuf::xsputn(const char* s, std::streamsize count) +{ + std::streamsize written_size = 0; + std::streamsize left_size = 0; + while (is_valid() && is_success() && written_size < count) { + left_size = epptr() - pptr(); + std::streamsize sub_size = std::min(count - written_size, left_size); + MEMCPY(pptr(), s + written_size, sub_size); + pbump(static_cast(sub_size)); + written_size += sub_size; + if (written_size < count) { + last_error_code_ = do_callback(); + } + } + return written_size; +} + +ObOStreamBuf::int_type ObOStreamBuf::overflow(int_type ch) +{ + if (is_valid() && is_success()) { + if (ch != traits_type::eof()) { + *pptr() = traits_type::to_char_type(ch); + pbump(1); + } + last_error_code_ = do_callback(); + } + return ch; +} + +int ObOStreamBuf::do_callback() +{ + int ret = OB_SUCCESS; + int64_t data_size = pptr() - pbase(); + if (0 < data_size) { + if (OB_FAIL(cb_(pbase(), data_size, cb_param_))) { + LOG_WARN("failed to do callback", K(ret)); + } else { + setp(data_, data_ + capacity_ - 1); // reset to clear write buffer + } + } + return ret; +} + +void ObOStreamBuf::check_finish() +{ + if (is_valid() && is_success()) { + last_error_code_ = do_callback(); + } +} + +/* + * ObIStreamBuf implement + * */ +int ObIStreamBuf::init() +{ + int ret = OB_SUCCESS; + if (is_valid()) { + ret = OB_INIT_TWICE; + LOG_WARN("init istreambuf twice", K(ret)); + } else if (OB_FAIL(do_callback())) { + last_error_code_ = ret; + LOG_WARN("failed to do callback", K(ret)); + } + return ret; +} + +ObIStreamBuf::pos_type ObIStreamBuf::seekoff(off_type off, std::ios_base::seekdir dir, std::ios_base::openmode mode) +{ + UNUSED(mode); + pos_type ret = 0; + if (is_success()) { + if (!is_valid()) { + last_error_code_ = do_callback(); + } + if (is_valid() && is_success()) { + if (std::ios_base::cur == dir) { + gbump(static_cast(off)); + } else if (std::ios_base::end == dir) { + setg(eback(), egptr() + off, egptr()); + } else if (std::ios_base::beg == dir) { + setg(eback(), eback() + off, egptr()); + } + ret = gptr() - eback(); + } + } + return ret; +} + +ObIStreamBuf::pos_type ObIStreamBuf::seekpos(pos_type pos, std::ios_base::openmode mode) +{ + return seekoff(pos, std::ios_base::beg, mode); +} + +std::streamsize ObIStreamBuf::xsgetn(char* s, std::streamsize n) +{ + std::streamsize get_size = 0; + std::streamsize data_size = 0; + if (is_success() && !is_valid()) { + last_error_code_ = do_callback(); + } + while (is_valid() && is_success() && get_size < n) { + data_size = egptr() - gptr(); + std::streamsize sub_size = std::min(n - get_size, data_size); + MEMCPY(s + get_size, gptr(), sub_size); + gbump(static_cast(sub_size)); + get_size += sub_size; + if (get_size < n) { + last_error_code_ = do_callback(); + } + } + return get_size; +} + +ObIStreamBuf::int_type ObIStreamBuf::underflow() +{ + int_type ch = traits_type::eof(); + if (is_success()) { + if (!is_valid()) { + last_error_code_ = do_callback(); + } + if (is_success() && is_valid()) { + if (gptr() < egptr()) { // at least one readable char + ch = traits_type::to_int_type(*gptr()); + } else { + last_error_code_ = do_callback(); + if (is_success() && gptr() < egptr()) { + ch = traits_type::to_int_type(*gptr()); + } + } + } + } + return ch; +} + +int ObIStreamBuf::do_callback() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(cb_(data_, capacity_, capacity_, cb_param_))) { // use data_ instead of eback() to change data_ + LOG_WARN("failed to do callback", K(ret)); + } else { + setg(data_, data_, data_ + capacity_); // fill the read buffer + } + return ret; +} +/* + * ObVectorIndexSerializer implement + * */ +int ObVectorIndexSerializer::serialize(void *index, ObOStreamBuf::CbParam &cb_param, ObOStreamBuf::Callback &cb, const int64_t capacity) +{ + int ret = OB_SUCCESS; + char *data = nullptr; + if (OB_ISNULL(index) || 0 > capacity) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(index), K(capacity)); + } else if (OB_ISNULL(data = static_cast(allocator_.alloc(capacity * sizeof(char))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc serialize buffer", K(ret), K(capacity)); + } else { + ObOStreamBuf streambuf(data, capacity, cb_param, cb); + std::ostream out(&streambuf); + if (OB_FAIL(obvectorutil::fserialize(index, out))) { + LOG_WARN("fail to do vsag serialize", K(ret)); + } else { + streambuf.check_finish(); // do last callback to ensure all the data is written + if (OB_FAIL(streambuf.get_error_code())) { + LOG_WARN("failed to serialize", K(ret)); + } + } + } + return ret; +} + +int ObVectorIndexSerializer::deserialize(void *&index, ObIStreamBuf::CbParam &cb_param, ObIStreamBuf::Callback &cb) +{ + int ret = OB_SUCCESS; + char *data = nullptr; + if (OB_ISNULL(index)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(index)); + } else { + ObIStreamBuf streambuf(nullptr, 0, cb_param, cb); + std::istream in(&streambuf); + if (OB_FAIL(streambuf.init())) { + if (ret == OB_ITER_END) { + LOG_INFO("[vec index deserialize] read table is empty, just return"); + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to init istreambuf", K(ret)); + } + } else if (OB_FAIL(obvectorutil::fdeserialize(index, in))) { + LOG_WARN("fail to do vsag deserialize", K(ret)); + } else if (OB_FAIL(streambuf.get_error_code())) { + if (ret == OB_ITER_END) { + LOG_INFO("[vec index deserialize] read table finish, just return"); + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to deserialize", K(ret)); + } + } + } + return ret; +} + +int ObHNSWDeserializeCallback::operator()(char*& data, const int64_t data_size, int64_t &read_size, share::ObIStreamBuf::CbParam &cb_param) +{ + UNUSED(data_size); + int ret = OB_SUCCESS; + blocksstable::ObDatumRow *row = nullptr; + ObDatum key_datum; + ObDatum data_datum; + ObHNSWDeserializeCallback::CbParam ¶m = static_cast(cb_param); + ObTableScanIterator *row_iter = static_cast(param.iter_); + ObIAllocator *alloactor = param.allocator_; + ObTextStringIter *&str_iter = param.str_iter_; + ObTextStringIterState state; + ObString src_block_data; + if (!param.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid row_iter", K(ret), K(row_iter)); + } else { + data = nullptr; + read_size = 0; + do { + if (OB_NOT_NULL(str_iter)) { + // try to get current next block + state = str_iter->get_next_block(src_block_data); + if (state == TEXTSTRING_ITER_NEXT) { + // get next block success + data = src_block_data.ptr(); + read_size = src_block_data.length(); + } else if (state == TEXTSTRING_ITER_END) { + // current lob is end, need to switch to next lob + // release current str iter + str_iter->~ObTextStringIter(); + alloactor->free(str_iter); + str_iter = nullptr; + } else { + ret = (str_iter->get_inner_ret() != OB_SUCCESS) ? + str_iter->get_inner_ret() : OB_INVALID_DATA; + LOG_WARN("iter state invalid", K(ret), K(state), KPC(str_iter)); + // return error, release current str iter + str_iter->~ObTextStringIter(); + alloactor->free(str_iter); + str_iter = nullptr; + } + } + if (OB_SUCC(ret) && OB_ISNULL(str_iter)) { + // we should get next str_iter + if (OB_FAIL(row_iter->get_next_row(row))) { + LOG_WARN("failed to get next row", K(ret)); + } else if (OB_ISNULL(row) || row->get_column_count() < 2) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid row", K(ret), K(row)); + } else { + key_datum = row->storage_datums_[0]; + data_datum = row->storage_datums_[1]; + LOG_INFO("[vec index debug] show key and data for vsag deserialize", K(key_datum), K(data_datum)); + if (OB_ISNULL(str_iter = OB_NEWx(ObTextStringIter, alloactor, ObLongTextType, CS_TYPE_BINARY, data_datum.get_string(), true))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to new ObTextStringIter", KR(ret)); + } else if (OB_FAIL(str_iter->init(0, NULL, alloactor))) { + LOG_WARN("init lob str iter failed ", K(ret)); + } + } + } + } while (OB_SUCC(ret) && OB_ISNULL(data)); + } + return ret; +} + +int ObHNSWSerializeCallback::operator()(const char *data, const int64_t data_size, share::ObOStreamBuf::CbParam &cb_param) +{ + int ret = OB_SUCCESS; + ObLobLocatorV2 src_lob(const_cast(data), data_size, false); // data from vsag must has no header + ObHNSWSerializeCallback::CbParam ¶m = static_cast(cb_param); + ObVecIdxSnapshotDataWriteCtx *vctx = reinterpret_cast(param.vctx_); + ObLobManager *lob_mngr = MTL(ObLobManager*); + ObLobAccessParam lob_param; + lob_param.set_tmp_allocator(param.tmp_allocator_); + lob_param.allocator_ = param.allocator_; + lob_param.ls_id_ = vctx->get_ls_id(); + lob_param.tablet_id_ = vctx->get_data_tablet_id(); + lob_param.lob_meta_tablet_id_ = vctx->get_lob_meta_tablet_id(); + lob_param.lob_piece_tablet_id_ = vctx->get_lob_piece_tablet_id(); + lob_param.inrow_threshold_ = param.lob_inrow_threshold_; + lob_param.src_tenant_id_ = MTL_ID(); // 补数据不会跨租户 + lob_param.coll_type_ = CS_TYPE_BINARY; + lob_param.offset_ = 0; + lob_param.scan_backward_ = false; + lob_param.is_total_quantity_log_ = true; + lob_param.sql_mode_ = SMO_DEFAULT; + lob_param.timeout_ = param.timeout_; + lob_param.lob_common_ = nullptr; + lob_param.snapshot_ = *reinterpret_cast(param.snapshot_); + lob_param.tx_desc_ = reinterpret_cast(param.tx_desc_); + if (OB_ISNULL(lob_mngr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get lob manager nullptr", K(ret)); + } else if (OB_FAIL(lob_mngr->append(lob_param, src_lob))) { + LOG_WARN("lob append failed.", K(ret)); + } else { + LOG_INFO("[vec index debug] success write one data into lob tablet", K(src_lob), + K(lob_param.lob_meta_tablet_id_), KPC(lob_param.tx_desc_)); + ObString dest_str(lob_param.handle_size_, (char*)lob_param.lob_common_); + if (OB_FAIL(vctx->get_vals().push_back(dest_str))) { + LOG_WARN("fail to push dest lob into ctx val array", K(ret)); + } + } + return ret; +} + +}; +}; \ No newline at end of file diff --git a/src/share/vector_index/ob_plugin_vector_index_serialize.h b/src/share/vector_index/ob_plugin_vector_index_serialize.h new file mode 100644 index 0000000000..11b08bb8d7 --- /dev/null +++ b/src/share/vector_index/ob_plugin_vector_index_serialize.h @@ -0,0 +1,199 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#ifndef OCEANBASE_SHARE_PLUGIN_VECTOR_INDEX_SERIALIZE_H_ +#define OCEANBASE_SHARE_PLUGIN_VECTOR_INDEX_SERIALIZE_H_ +#include +#include "lib/function/ob_function.h" +#include "lib/allocator/page_arena.h" +#include "common/row/ob_row_iterator.h" +#include "share/ob_lob_access_utils.h" + + +namespace oceanbase +{ +namespace share +{ + + +class ObStreamBuf : public std::streambuf +{ +public: + explicit ObStreamBuf(char *data, const int64_t capacity) + : std::streambuf(), + capacity_(capacity), + data_(data), + last_error_code_(OB_SUCCESS) + {} + bool is_valid() const { return nullptr != data_; } + bool is_success() const { return OB_SUCCESS == last_error_code_; } + + TO_STRING_KV(K_(data), K_(capacity)); +protected: + int64_t capacity_; + char *data_; + int last_error_code_; +}; + +class ObOStreamBuf : public ObStreamBuf +{ +public: + struct CbParam + { + virtual ~CbParam() = default; + }; + using Callback = ObFunction; + explicit ObOStreamBuf(char *data, const int64_t capacity, CbParam &cb_param, Callback &cb) + : ObStreamBuf(data, capacity), + cb_param_(cb_param), + cb_(cb) + { + setp(data_, data_ + capacity_ - 1); + } + + void check_finish(); + int get_error_code() const { return last_error_code_; } + TO_STRING_KV(K(this)); + +protected: + virtual std::streamsize xsputn(const char* s, std::streamsize count) override; + virtual int_type overflow(int_type c) override; + +private: + int do_callback(); + +private: + CbParam &cb_param_; + Callback cb_; +}; + +class ObIStreamBuf : public ObStreamBuf +{ +public: + struct CbParam + { + virtual ~CbParam() = default; + }; + using Callback = ObFunction; + explicit ObIStreamBuf(char *data, const int64_t capacity, CbParam &cb_param, Callback &cb) + : ObStreamBuf(data, capacity), + cb_param_(cb_param), + cb_(cb) + { + setg(data_, data_, data_); + } + int init(); + int get_error_code() const { return last_error_code_; } + TO_STRING_KV(K_(data)); + +protected: + virtual std::streamsize xsgetn(char* s, std::streamsize n) override; + virtual int_type underflow() override; + + virtual pos_type seekoff(off_type off, std::ios_base::seekdir dir, + std::ios_base::openmode mode = std::ios_base::in | std::ios_base::out) override; + + virtual pos_type seekpos(pos_type pos, std::ios_base::openmode mode = std::ios_base::in | std::ios_base::out) override; + +private: + int do_callback(); + +private: + CbParam &cb_param_; + Callback cb_; +}; + +class ObHNSWDeserializeCallback { +public: + struct CbParam : public ObIStreamBuf::CbParam { + CbParam(ObNewRowIterator *iter, ObIAllocator *allocator) + : iter_(iter), allocator_(allocator), str_iter_(nullptr) + {} + CbParam() + : iter_(nullptr), + allocator_(nullptr), + str_iter_(nullptr) + {} + virtual ~CbParam() { + if (str_iter_ != nullptr) { + str_iter_->~ObTextStringIter(); + if (allocator_ != nullptr) { + allocator_->free(str_iter_); + } + str_iter_ = nullptr; + } + } + bool is_valid() const + { + return nullptr != iter_ + && nullptr != allocator_; + } + ObNewRowIterator *iter_; + ObIAllocator *allocator_; + ObTextStringIter *str_iter_; + }; +public: + ObHNSWDeserializeCallback() + {} + int operator()(char *&data, const int64_t data_size, int64_t &read_size, share::ObIStreamBuf::CbParam &cb_param); +private: +}; + +class ObHNSWSerializeCallback { +public: + struct CbParam : public ObOStreamBuf::CbParam { + CbParam() + : vctx_(nullptr), allocator_(nullptr), tmp_allocator_(nullptr), tx_desc_(nullptr), snapshot_(nullptr), + timeout_(0), lob_inrow_threshold_(0) + {} + virtual ~CbParam() {} + bool is_valid() const + { + return nullptr != vctx_ + && nullptr != allocator_ + && nullptr != tx_desc_ + && nullptr != snapshot_; + } + void *vctx_; // ObVecIdxSnapshotDataWriteCtx + ObIAllocator *allocator_; + ObIAllocator *tmp_allocator_; + void *tx_desc_; // transaction::ObTxDesc + void *snapshot_; // transaction::ObTxReadSnapshot + int64_t timeout_; + int64_t lob_inrow_threshold_; + }; +public: + ObHNSWSerializeCallback() + {} + int operator()(const char *data, const int64_t data_size, share::ObOStreamBuf::CbParam &cb_param); +private: +}; + +class ObVectorIndexSerializer +{ +public: + explicit ObVectorIndexSerializer(ObIAllocator &allocator) + : allocator_(allocator) + {} + + int serialize(void *index, ObOStreamBuf::CbParam &cb_param, ObOStreamBuf::Callback &cb, const int64_t capacity = DEFAULT_OUTBUF_CAPACITY); + int deserialize(void *&index, ObIStreamBuf::CbParam &cb_param, ObIStreamBuf::Callback &cb); +private: + static const int64_t DEFAULT_OUTBUF_CAPACITY = 64LL * 1024LL; // 64KB + +private: + bool is_inited_; + ObIAllocator &allocator_; +}; + +}; +}; +#endif // OCEANBASE_SHARE_PLUGIN_VECTOR_INDEX_SERIALIZE_H_ \ No newline at end of file diff --git a/src/share/vector_index/ob_plugin_vector_index_service.cpp b/src/share/vector_index/ob_plugin_vector_index_service.cpp new file mode 100644 index 0000000000..a3c1943670 --- /dev/null +++ b/src/share/vector_index/ob_plugin_vector_index_service.cpp @@ -0,0 +1,1050 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#define USING_LOG_PREFIX SERVER +#include "share/vector_index/ob_plugin_vector_index_service.h" +#include "share/vector_index/ob_plugin_vector_index_utils.h" +#include "share/table/ob_ttl_util.h" +#include "storage/ls/ob_ls.h" +#include "storage/tx_storage/ob_ls_service.h" + +namespace oceanbase +{ +namespace share +{ + +ObPluginVectorIndexMgr::~ObPluginVectorIndexMgr() +{ + destroy(); +} + +void ObPluginVectorIndexMgr::destroy() +{ + if (IS_INIT) { + LOG_INFO("LS Vector Index Mgr destory", K(ls_id_)); + is_inited_ = false; + need_check_ = false; + ls_id_.reset(); + release_all_adapters(); + partial_index_adpt_map_.destroy(); + complete_index_adpt_map_.destroy(); + first_mem_sync_map_.destroy(); + second_mem_sync_map_.destroy(); + // elements memory in adpt map will be released by allocator in service, refine later; + // elements memory in mem_sync_map should be released here, they are alloc by ob_malloc; + // should use 2 allocator to avoid accumulation + task_allocator_.reset(); + } +} + +void ObPluginVectorIndexMgr::release_all_adapters() +{ + int ret = OB_SUCCESS; + WLockGuard lock_guard(adapter_map_rwlock_); + FOREACH(iter, partial_index_adpt_map_) { + const ObTabletID &tablet_id = iter->first; + ObPluginVectorIndexAdaptor *adapter = iter->second; + if (OB_FAIL(ObPluginVectorIndexUtils::release_vector_index_adapter(adapter))) { + LOG_WARN("fail to release vector index adapter", K(tablet_id), KR(ret)); + ret = OB_SUCCESS; // continue release + } + } + FOREACH(iter, complete_index_adpt_map_) { + const ObTabletID &tablet_id = iter->first; + ObPluginVectorIndexAdaptor *adapter = iter->second; + if (OB_FAIL(ObPluginVectorIndexUtils::release_vector_index_adapter(adapter))) { + LOG_WARN("fail to release vector index adapter", K(tablet_id), KR(ret)); + ret = OB_SUCCESS; // continue release + } + } +} + +int ObPluginVectorIndexMgr::init(uint64_t tenant_id, + ObLSID ls_id, + lib::MemoryContext &memory_context, + uint64_t *all_vsag_use_mem) +{ + int ret = OB_SUCCESS; + int64_t hash_capacity = common::hash::cal_next_prime(DEFAULT_ADAPTER_HASH_SIZE); + if (OB_FAIL(complete_index_adpt_map_.create(hash_capacity, "VecIdxAdpt"))) { + LOG_WARN("fail to create full index adapter map", K(ls_id), KR(ret)); + } else if (OB_FAIL(partial_index_adpt_map_.create(hash_capacity, "VecIdxAdpt"))) { + LOG_WARN("fail to create partial index adapter map", K(ls_id), KR(ret)); + } else if (OB_FAIL(first_mem_sync_map_.create(hash_capacity, "VecIdxAdpt", "VecIdxAdpt"))) { + LOG_WARN("fail to create first mem sync set", K(ls_id), KR(ret)); + } else if (OB_FAIL(second_mem_sync_map_.create(hash_capacity, "VecIdxAdpt", "VecIdxAdpt"))) { + LOG_WARN("fail to create second mem sync set", K(ls_id), KR(ret)); + } else { + ls_tablet_task_ctx_.task_id_ = 0; + ls_tablet_task_ctx_.non_memdata_task_cycle_ = 0; + ls_tablet_task_ctx_.need_memdata_sync_ = false; + ls_tablet_task_ctx_.state_ = OB_TTL_TASK_PREPARE; + need_check_ = false; + tenant_id_ = tenant_id; + ls_id_ = ls_id; + memory_context_ = memory_context; + all_vsag_use_mem_ = all_vsag_use_mem; + is_inited_ = true; + } + return ret; +} + +int ObPluginVectorIndexMgr::set_complete_adapter_(ObTabletID tablet_id, + ObPluginVectorIndexAdaptor *adapter_inst, + int overwrite) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(complete_index_adpt_map_.set_refactored(tablet_id, adapter_inst, overwrite))) { + LOG_WARN("failed to set complete vector index adapter", K(tablet_id), KR(ret)); + } else { + adapter_inst->inc_ref(); + } + return ret; +} + +int ObPluginVectorIndexMgr::erase_complete_adapter(ObTabletID tablet_id) +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexAdaptor *adapter_inst = nullptr; + if (OB_FAIL(complete_index_adpt_map_.erase_refactored(tablet_id, &adapter_inst))) { + if (ret != OB_HASH_NOT_EXIST) { + LOG_WARN("failed to erase partial vector index adapter", K(tablet_id), KR(ret)); + } + } else if (OB_ISNULL(adapter_inst)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("adapter inst is null", K(tablet_id), KR(ret)); + } else { + if (OB_FAIL(ObPluginVectorIndexUtils::release_vector_index_adapter(adapter_inst))) { + LOG_WARN("fail to release vector index adapter", K(tablet_id), KR(ret)); + } + } + return ret; +} + +int ObPluginVectorIndexMgr::set_partial_adapter_(ObTabletID tablet_id, + ObPluginVectorIndexAdaptor *adapter_inst, + int overwrite) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(partial_index_adpt_map_.set_refactored(tablet_id, adapter_inst, overwrite))) { + LOG_WARN("failed to set partial vector index adapter", K(tablet_id), KR(ret)); + } else { + adapter_inst->inc_ref(); + } + return ret; +} + +int ObPluginVectorIndexMgr::erase_partial_adapter_(ObTabletID tablet_id) +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexAdaptor *adapter_inst = nullptr; + if (OB_FAIL(partial_index_adpt_map_.erase_refactored(tablet_id, &adapter_inst))) { + LOG_WARN("failed to erase partial vector index adapter", K(tablet_id), KR(ret)); + } else if (OB_ISNULL(adapter_inst)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("adapter inst is null", K(tablet_id), KR(ret)); + } else { + if (OB_FAIL(ObPluginVectorIndexUtils::release_vector_index_adapter(adapter_inst))) { + LOG_WARN("fail to release vector index adapter", K(tablet_id), KR(ret)); + } + } + return ret; +} + +int ObPluginVectorIndexMgr::erase_partial_adapter(ObTabletID tablet_id) +{ + return erase_partial_adapter_(tablet_id); +} + +int ObPluginVectorIndexMgr::get_adapter_inst_guard(ObTabletID tablet_id, ObPluginVectorIndexAdapterGuard &adpt_guard) +{ + int ret = OB_SUCCESS; + RLockGuard lock_guard(adapter_map_rwlock_); + + ObPluginVectorIndexAdaptor *index_inst = nullptr; + if (OB_FAIL(get_adapter_inst_(tablet_id, index_inst))) { + LOG_WARN("failed to get adapter inst", K(tablet_id), KR(ret)); + } else if (OB_FAIL(adpt_guard.set_adapter(index_inst))) { + LOG_WARN("failed to set adapter", K(tablet_id), KR(ret)); + } + return ret; +} + +int ObPluginVectorIndexMgr::get_adapter_inst_(ObTabletID tablet_id, ObPluginVectorIndexAdaptor *&index_inst) +{ + int ret = OB_SUCCESS; + index_inst = nullptr; + + if (OB_FAIL(partial_index_adpt_map_.get_refactored(tablet_id, index_inst))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get temp vector index inst", K(tablet_id), KR(ret)); + } else { + ret = OB_SUCCESS; // not in partial adapter, try to get complete adapter + } + } else if (OB_ISNULL(index_inst)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null temp vector index inst", K(tablet_id), KR(ret)); + } + + if (OB_FAIL(ret) || OB_NOT_NULL(index_inst)) { + } else if (OB_FAIL(complete_index_adpt_map_.get_refactored(tablet_id, index_inst))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get full vector index inst", K(tablet_id), KR(ret)); + } else { + // ret is OB_HASH_NOT_EXIST not found, + } + } else if (OB_ISNULL(index_inst)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null full vector index inst", K(tablet_id), KR(ret)); + } + + return ret; +} + +int ObPluginVectorIndexMgr::create_partial_adapter(ObTabletID idx_tablet_id, + ObTabletID data_tablet_id, + ObIndexType type, + ObIAllocator &allocator, + int64_t index_table_id, + ObString *vec_index_param, + int64_t dim) +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexAdaptor *tmp_vec_idx_adpt = nullptr; + + void *adpt_buff = allocator.alloc(sizeof(ObPluginVectorIndexAdaptor)); + if (OB_ISNULL(adpt_buff)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for vector index adapter", KR(ret)); + } else { + tmp_vec_idx_adpt = new(adpt_buff)ObPluginVectorIndexAdaptor(&allocator, memory_context_); + ObVectorIndexRecordType record_type = ObPluginVectorIndexUtils::index_type_to_record_type(type); + if (record_type >= VIRT_MAX) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid index type", K(type), KR(ret)); + // always init after construct + } else if ((OB_ISNULL(vec_index_param) || vec_index_param->empty()) + && OB_FAIL(tmp_vec_idx_adpt->init(memory_context_, all_vsag_use_mem_))) { + LOG_WARN("failed to init adpt.", K(ret)); + // need to handle dim and type + } else if ((OB_NOT_NULL(vec_index_param) && !vec_index_param->empty()) + && OB_FAIL(tmp_vec_idx_adpt->init(*vec_index_param, dim, memory_context_, all_vsag_use_mem_))) { + LOG_WARN("failed to init adpt.", K(ret), K(*vec_index_param), K(dim)); + } else if (OB_FAIL(tmp_vec_idx_adpt->set_tablet_id(record_type, idx_tablet_id))) { + LOG_WARN("failed to set tablet id", K(idx_tablet_id), K(type), KR(ret)); + } else if (data_tablet_id.is_valid() // tmp adapter may not have data_tablet id + && OB_FAIL(tmp_vec_idx_adpt->set_tablet_id(VIRT_DATA, data_tablet_id))) { + LOG_WARN("failed to set data tablet id", K(idx_tablet_id), K(type), K(data_tablet_id), KR(ret)); + } else if (OB_FAIL(tmp_vec_idx_adpt->set_table_id(record_type, index_table_id))) { + LOG_WARN("failed to set index table id", K(idx_tablet_id), K(type), K(index_table_id), KR(ret)); + } else { + tmp_vec_idx_adpt->set_create_type(ObPluginVectorIndexUtils::index_type_to_create_type(type)); + } + if (OB_SUCC(ret)) { + WLockGuard lock_guard(adapter_map_rwlock_); + if (OB_FAIL(set_partial_adapter_(idx_tablet_id, tmp_vec_idx_adpt))) { + LOG_WARN("set vector index adapter faild", K(idx_tablet_id), KR(ret)); + } // other thread set already, need get again ? + } + if (OB_FAIL(ret) && OB_NOT_NULL(tmp_vec_idx_adpt)) { + tmp_vec_idx_adpt->~ObPluginVectorIndexAdaptor(); + allocator.free(adpt_buff); + tmp_vec_idx_adpt = nullptr; + adpt_buff = nullptr; + } + } + + return ret; +} + +int ObPluginVectorIndexMgr::get_or_create_partial_adapter_(ObTabletID tablet_id, + ObIndexType type, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObString *vec_index_param, + int64_t dim, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(get_adapter_inst_guard(tablet_id, adapter_guard))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get vector index adapter", K(tablet_id), KR(ret)); + } else { // not exist create new + if (OB_FAIL(create_partial_adapter(tablet_id, ObTabletID(), type, allocator, OB_INVALID_ID, vec_index_param, dim))) { + LOG_WARN("failed to create tmp vector index instance with ls", K(tablet_id), K(type), KR(ret)); + } else if (OB_FAIL(get_adapter_inst_guard(tablet_id, adapter_guard))) { + LOG_WARN("failed to get tmp vector index instance with ls", K(tablet_id), K(type), KR(ret)); + } else { + LOG_INFO("create partial index adapter success", K(ret), KPC(adapter_guard.get_adatper())); + } + } + } + return ret; +} + +int ObPluginVectorIndexMgr::get_adapter_inst_by_ctx(ObVectorIndexAcquireCtx &ctx, + bool &need_merge, + ObIAllocator &allocator, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObVectorIndexAdapterCandiate &candidate, + ObString *vec_index_param, + int64_t dim) +{ + int ret = OB_SUCCESS; + need_merge = true; + + if (!ctx.inc_tablet_id_.is_valid() + || !ctx.snapshot_tablet_id_.is_valid() + || !ctx.vbitmap_tablet_id_.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ctx), KR(ret)); + } else { + ObPluginVectorIndexAdaptor *adapter = nullptr; + // fast return if get complete adapter + if (OB_FAIL(get_or_create_partial_adapter_(ctx.inc_tablet_id_, + INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL, + candidate.inc_adatper_guard_, + vec_index_param, + dim, + allocator))) { + LOG_WARN("failed to get vector index adapter", K(ctx.inc_tablet_id_), KR(ret)); + } else if (FALSE_IT(adapter = candidate.inc_adatper_guard_.get_adatper())) { + } else if (adapter->get_create_type() == CreateTypeFullPartial + || adapter->get_create_type() == CreateTypeComplete) { + if (OB_FAIL(adapter_guard.set_adapter(adapter))) { + LOG_WARN("failed to set adapter", K(adapter_guard), KR(ret)); + } else { + need_merge = false; + } + } + + if (OB_FAIL(ret) || need_merge == false) { + // do nothing + } else if (OB_FAIL(get_or_create_partial_adapter_(ctx.vbitmap_tablet_id_, + INDEX_TYPE_VEC_INDEX_ID_LOCAL, + candidate.bitmp_adatper_guard_, + vec_index_param, + dim, + allocator))) { + LOG_WARN("failed to get vector index adapter", K(ctx.vbitmap_tablet_id_), KR(ret)); + } else if (FALSE_IT(adapter = candidate.bitmp_adatper_guard_.get_adatper())) { + } else if (adapter->get_create_type() == CreateTypeFullPartial + || adapter->get_create_type() == CreateTypeComplete) { + if (OB_FAIL(adapter_guard.set_adapter(adapter))) { + LOG_WARN("failed to set adapter", K(adapter_guard), KR(ret)); + } else { + need_merge = false; + } + } + + if (OB_FAIL(ret) || need_merge == false) { + // do nothing + } else if (OB_FAIL(get_or_create_partial_adapter_(ctx.snapshot_tablet_id_, + INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL, + candidate.sn_adatper_guard_, + vec_index_param, + dim, + allocator))) { + LOG_WARN("failed to get vector index adapter", K(ctx.snapshot_tablet_id_), KR(ret)); + } else if (FALSE_IT(adapter = candidate.sn_adatper_guard_.get_adatper())) { + } else if (adapter->get_create_type() == CreateTypeFullPartial + || adapter->get_create_type() == CreateTypeComplete) { + if (OB_FAIL(adapter_guard.set_adapter(adapter))) { + LOG_WARN("failed to set adapter", K(adapter_guard), KR(ret)); + } else { + need_merge = false; + } + } + } + return ret; +} + +int ObPluginVectorIndexMgr::get_and_merge_adapter(ObVectorIndexAcquireCtx &ctx, + ObIAllocator &allocator, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObString *vec_index_param, + int64_t dim) +{ + int ret = OB_SUCCESS; + bool need_merge = false; + ObVectorIndexAdapterCandiate candidate; + if (OB_FAIL(get_adapter_inst_by_ctx(ctx, need_merge, allocator, adapter_guard, + candidate, vec_index_param, dim))) { + LOG_WARN("failed to get and merge adapter", K(ls_id_), K(ctx), KR(ret)); + } + if (OB_SUCC(ret) + && need_merge + && OB_FAIL(replace_with_full_partial_adapter(ctx, allocator, adapter_guard, + vec_index_param, dim, &candidate))) { + LOG_WARN("failed to replace with full partial adapter", K(ctx), KR(ret)); + } + + return ret; +} + +int ObPluginVectorIndexMgr::check_need_mem_data_sync_task(bool &need_sync) +{ + need_sync = false; + if (get_processing_map().size() > 0) { + if (ls_tablet_task_ctx_.all_finished_) { // is false + get_processing_map().reuse(); + // release task ctx + ls_tablet_task_ctx_.all_finished_ = false; + LOG_INFO("release processing set to waiting set", + K(ls_id_), + K(processing_first_mem_sync_), + K(get_processing_map().size()), + K(get_waiting_map().size()), + K(ls_tablet_task_ctx_)); + } else { + need_sync = true; // continue sync current processing set + LOG_INFO("continue processing set to waiting set", + K(ls_id_), + K(processing_first_mem_sync_), + K(get_processing_map().size()), + K(get_waiting_map().size()), + K(ls_tablet_task_ctx_)); + } + } + if (!need_sync && get_waiting_map().size() > 0) { + // procession_set is empty, wating list not empty + need_sync = true; + switch_processing_map(); + LOG_INFO("switch processing set to waiting set", + K(ls_id_), + K(processing_first_mem_sync_), + K(get_processing_map().size()), + K(get_waiting_map().size()), + K(ls_tablet_task_ctx_)); + } + // both map empty, do nothing + return OB_SUCCESS; +} + +int ObPluginVectorIndexService::acquire_adapter_guard(ObLSID ls_id, + ObVectorIndexAcquireCtx &ctx, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObString *vec_index_param, + int64_t dim) +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexMgr *ls_index_mgr = nullptr; + + if (OB_FAIL(acquire_vector_index_mgr(ls_id, ls_index_mgr))) { + LOG_WARN("failed to acquire vector index mgr", K(ls_id), KR(ret)); + } else if (OB_FAIL(ls_index_mgr->get_and_merge_adapter(ctx, allocator_, adapter_guard, + vec_index_param, dim))) { + LOG_WARN("failed to get and merge adapter", K(ls_id), K(ctx), KR(ret)); + } + + return ret; +} + +int ObPluginVectorIndexService::acquire_adapter_guard(ObLSID ls_id, + ObTabletID tablet_id, + ObIndexType type, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObString *vec_index_param, + int64_t dim) +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexMgr *ls_index_mgr = nullptr; + if (OB_FAIL(get_ls_index_mgr_map().get_refactored(ls_id, ls_index_mgr))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get vector index mgr for ls", K(ls_id), KR(ret)); + } else { // create new ls index mgr if not exist + ret = OB_SUCCESS; + if (OB_FAIL(create_partial_adapter(ls_id, tablet_id, ObTabletID(), type, OB_INVALID_ID, vec_index_param, dim))) { + LOG_WARN("failed to create tmp vector index instance", K(ls_id), K(tablet_id), K(type), KR(ret)); + } else if (OB_FAIL(get_adapter_inst_guard(ls_id, tablet_id, adapter_guard))) { + LOG_WARN("failed to get tmp vector index instance", K(ls_id), K(tablet_id), K(type), KR(ret)); + } else { + LOG_INFO("create partial index adapter success", K(ret), K(ls_id), KPC(adapter_guard.get_adatper())); + } + } + } else if (OB_ISNULL(ls_index_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null vector index mgr for ls", K(ls_id), KR(ret)); + } else if (OB_FAIL(ls_index_mgr->get_adapter_inst_guard(tablet_id, adapter_guard))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get vector index adapter", K(ls_id), K(tablet_id), KR(ret)); + } else { // not exist create new + if (OB_FAIL(ls_index_mgr->create_partial_adapter(tablet_id, ObTabletID(), type, allocator_, OB_INVALID_ID, vec_index_param, dim))) { + LOG_WARN("failed to create tmp vector index instance with ls", K(ls_id), K(tablet_id), K(type), KR(ret)); + } else if (OB_FAIL(ls_index_mgr->get_adapter_inst_guard(tablet_id, adapter_guard))) { + LOG_WARN("failed to get tmp vector index instance with ls", K(ls_id), K(tablet_id), K(type), KR(ret)); + } else { + LOG_INFO("create partial index adapter success", K(ret), K(ls_id), KPC(adapter_guard.get_adatper())); + } + } + } else { + // get from existed ls index mgr + } + if (OB_SUCC(ret) + && OB_NOT_NULL(adapter_guard.get_adatper()) + && adapter_guard.get_adatper()->get_index_type() >= ObVectorIndexAlgorithmType::VIAT_MAX) { + // check index param, if it is emtpy, may get partial adapter during maintenance + if (OB_NOT_NULL(vec_index_param) + && !vec_index_param->empty() + && OB_FAIL(adapter_guard.get_adatper()->set_param(*vec_index_param, dim))) { + LOG_WARN("failed to set param", K(ret), K(ls_id), K(tablet_id), K(type), KPC(vec_index_param), K(dim)); + } + LOG_INFO("may get get partial adapter during maintenance", KPC(adapter_guard.get_adatper())); + } + + return ret; +} + +int ObPluginVectorIndexMgr::check_and_merge_partial_inner(ObVecIdxSharedTableInfoMap &info_map, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + typedef common::hash::ObHashMap VectorIndexIdentityMap; + VectorIndexIdentityMap data_tablet_id_map; + ObArenaAllocator tmp_allocator("VectorAdptCandi", OB_MALLOC_NORMAL_BLOCK_SIZE, tenant_id_); + if (OB_FAIL(data_tablet_id_map.create(DEFAULT_CANDIDATE_ADAPTER_HASH_SIZE, "VecIdxDataTID"))) { + LOG_WARN("fail to create hash map for data tablet id to vec index adapter", KR(ret)); + } else { + // build candidate and save to data_tablet_id_map + // query process may merge adapters and delete partial adapters from hashmap, + // use lock here to avoid merge race condition for simple + RLockGuard lock_guard(adapter_map_rwlock_); + + FOREACH_X(adpt_lt, get_partial_adapter_map(), OB_SUCC(ret)) { + ObTabletID index_tablet_id = adpt_lt->first; + ObPluginVectorIndexAdaptor *partial_adpt = adpt_lt->second; + ObTabletID data_tablet_id = partial_adpt->get_data_tablet_id(); + ObVectorIndexAdapterCandiate *candidate = nullptr; + char *buff = nullptr; + ObPluginVectorIndexIdentity index_identity(data_tablet_id, partial_adpt->get_index_identity()); + if (!index_identity.is_valid()) { + // skip, wait for next round + } else { + if (OB_FAIL(data_tablet_id_map.get_refactored(index_identity, candidate))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get candidate index adapter", K(index_identity), KR(ret)); + } else { + buff = static_cast(tmp_allocator.alloc(sizeof(ObVectorIndexAdapterCandiate))); + if (OB_ISNULL(buff)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for vector index adapter", KR(ret)); + } else { + candidate = new(buff)ObVectorIndexAdapterCandiate(); + if (OB_FAIL(data_tablet_id_map.set_refactored(index_identity, candidate))) { + LOG_WARN("failed to set candidate index adapter", K(index_identity), KR(ret)); + } + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(candidate)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid candidate index adapter", KR(ret)); + } else { + if (index_tablet_id == partial_adpt->get_inc_tablet_id()) { + if (candidate->inc_adatper_guard_.is_valid()) { // conflict maybe during rebuild + candidate->is_valid_ = false; + } else { + candidate->inc_adatper_guard_.set_adapter(partial_adpt); + } + } else if (index_tablet_id == partial_adpt->get_vbitmap_tablet_id()) { + if (candidate->bitmp_adatper_guard_.is_valid()) { // conflict maybe during rebuild + candidate->is_valid_ = false; + } else { + candidate->bitmp_adatper_guard_.set_adapter(partial_adpt); + } + } else if (index_tablet_id == partial_adpt->get_snap_tablet_id()) { + if (candidate->sn_adatper_guard_.is_valid()) { // conflict maybe during rebuild + candidate->is_valid_ = false; + } else { + candidate->sn_adatper_guard_.set_adapter(partial_adpt); + } + } + } + } + } + } + + ret = OB_SUCCESS; // continue handle valid candidates + FOREACH_X(candidate_adpt_lt, data_tablet_id_map, OB_SUCC(ret)) { + ObVectorIndexAdapterCandiate *candidate = candidate_adpt_lt->second; + if (OB_ISNULL(candidate)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid candidate index adapter", KR(ret), K(candidate_adpt_lt->first)); + } else if (candidate->is_valid_ == false || (!candidate->is_complete())) { + // do nothing + } else if (OB_FAIL(replace_with_complete_adapter(candidate, info_map, allocator))) { + LOG_WARN("failed to replace adapter", KR(ret), K(candidate_adpt_lt->first)); + } + } + // do clean up + FOREACH(candidate_adpt_lt, data_tablet_id_map) { + ObVectorIndexAdapterCandiate *candidate = candidate_adpt_lt->second; + if (OB_ISNULL(candidate)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid candidate index adapter", KR(ret)); + } else { + candidate->~ObVectorIndexAdapterCandiate(); + } + } + + data_tablet_id_map.reuse(); + tmp_allocator.reset(); + + return ret; +} + +int ObPluginVectorIndexService::check_and_merge_adapter(ObLSID ls_id, ObVecIdxSharedTableInfoMap &info_map) +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexMgr *index_ls_mgr = nullptr; + if (OB_FAIL(get_ls_index_mgr_map().get_refactored(ls_id, index_ls_mgr))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to get vector index ls mgr", KR(ret), K(tenant_id_), K(ls_id)); + } + } else if (OB_ISNULL(index_ls_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid vector index ls mgr", KR(ret), K(tenant_id_), K(ls_id)); + } else if (!index_ls_mgr->get_partial_adapter_map().empty()) { + if (OB_FAIL(index_ls_mgr->check_and_merge_partial_inner(info_map, allocator_))) { + LOG_WARN("failed to check and merge partial adapter", KR(ret)); + } + + } + + return ret; +} + +int ObPluginVectorIndexService::get_adapter_inst_guard(ObLSID ls_id, + ObTabletID tablet_id, + ObPluginVectorIndexAdapterGuard &adpt_guard) +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexMgr *ls_index_mgr = nullptr; + if (OB_FAIL(get_ls_index_mgr_map().get_refactored(ls_id, ls_index_mgr))) { + LOG_WARN("failed to get vector index mgr for ls", K(ls_id), KR(ret)); + } else if (OB_ISNULL(ls_index_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null vector index mgr for ls", K(ls_id), KR(ret)); + } else if (OB_FAIL(ls_index_mgr->get_adapter_inst_guard(tablet_id, adpt_guard))) { + LOG_WARN("failed to get vector index adapter", K(ls_id), K(tablet_id), KR(ret)); + } + return ret; +} + +int ObPluginVectorIndexService::acquire_vector_index_mgr(ObLSID ls_id, ObPluginVectorIndexMgr *&mgr) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(get_ls_index_mgr_map().get_refactored(ls_id, mgr))) { + if (OB_HASH_NOT_EXIST == ret) { + void *mgr_buff = allocator_.alloc(sizeof(ObPluginVectorIndexMgr)); + if (OB_ISNULL(mgr_buff)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memeory for new vector index mgr", KR(ret)); + } else { + ObPluginVectorIndexMgr *new_ls_index_mgr = new(mgr_buff)ObPluginVectorIndexMgr(memory_context_); + if (OB_FAIL(new_ls_index_mgr->init(tenant_id_, ls_id, memory_context_, &all_vsag_use_mem_))) { + LOG_WARN("failed to init ls vector index mgr", K(ls_id), KR(ret)); + } else if (OB_FAIL(get_ls_index_mgr_map().set_refactored(ls_id, new_ls_index_mgr))) { + LOG_WARN("set vector index mgr map faild", K(ls_id), KR(ret)); + } + if (OB_FAIL(ret)) { + new_ls_index_mgr->~ObPluginVectorIndexMgr(); + allocator_.free(mgr_buff); + new_ls_index_mgr = nullptr; + mgr_buff = nullptr; + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(get_ls_index_mgr_map().get_refactored(ls_id, mgr))) { + LOG_WARN("failed to get vector index mgr for ls", K(ls_id), KR(ret)); + } + } + } + } + return ret; +} + +int ObPluginVectorIndexService::create_partial_adapter(ObLSID ls_id, + ObTabletID idx_tablet_id, + ObTabletID data_tablet_id, + ObIndexType type, + int64_t index_table_id, + ObString *vec_index_param, + int64_t dim) +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexMgr *ls_index_mgr = nullptr; + ObPluginVectorIndexMgr *new_ls_index_mgr = nullptr; + ObPluginVectorIndexAdaptor *tmp_vec_idx_adpt = nullptr; + + if (OB_FAIL(acquire_vector_index_mgr(ls_id, ls_index_mgr))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get vector index mgr for ls", K(ls_id), KR(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(ls_index_mgr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null vector index mgr for ls", K(ls_id), KR(ret)); + } else if (OB_FAIL(ls_index_mgr->create_partial_adapter(idx_tablet_id, + data_tablet_id, + type, + allocator_, + index_table_id, + vec_index_param, + dim))) { + LOG_WARN("set vector index adapter faild", K(ls_id), K(idx_tablet_id), KR(ret)); + } + + return ret; +} + +ObPluginVectorIndexService::~ObPluginVectorIndexService() +{ + destroy(); +} + +void ObPluginVectorIndexService::destroy() +{ + if (IS_INIT) { + LOG_INFO("destroy vector index load task timer", K_(tenant_id)); + is_inited_ = false; + has_start_ = false; + tenant_id_ = OB_INVALID_TENANT_ID; + is_ls_or_tablet_changed_ = false; + schema_service_ = NULL; + ls_service_ = NULL; + + FOREACH(iter, index_ls_mgr_map_) { + const ObLSID &ls_id = iter->first; + ObPluginVectorIndexMgr *ls_index_mgr = iter->second; + if (OB_NOT_NULL(ls_index_mgr)) { + ls_index_mgr->destroy(); + allocator_.free(ls_index_mgr); + ls_index_mgr = nullptr; + } + } + index_ls_mgr_map_.destroy(); + allocator_.reset(); + if (memory_context_ != nullptr) { + DESTROY_CONTEXT(memory_context_); + memory_context_ = nullptr; + } + alloc_.reset(); + } +} + +int ObPluginVectorIndexService::init(const uint64_t tenant_id, + schema::ObMultiVersionSchemaService *schema_service, + ObLSService *ls_service) +{ + int ret = OB_SUCCESS; + lib::ObMemAttr mem_attr(MTL_ID(), "VecIdxSrv"); + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", KR(ret), K(tenant_id)); + } else if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id) + || OB_ISNULL(schema_service) + || OB_ISNULL(ls_service)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument to init ObPluginVectorIndexService", KR(ret), K(tenant_id)); + } else if (OB_FAIL(index_ls_mgr_map_.create(common::hash::cal_next_prime(DEFAULT_LS_HASH_SIZE), "VecIdxLSMgr"))) { + LOG_WARN("create ls mgr ", KR(ret), K(tenant_id)); + } else if (FALSE_IT(alloc_.set_tenant_id(MTL_ID()))) { + } else if (OB_FAIL(allocator_.init(&alloc_, OB_MALLOC_MIDDLE_BLOCK_SIZE, mem_attr))) { + LOG_WARN("ObTenantSrs allocator init failed.", K(ret)); + } else { + lib::ContextParam param; + param.set_mem_attr(MTL_ID()) + .set_properties(lib::ADD_CHILD_THREAD_SAFE | lib::ALLOC_THREAD_SAFE | lib::RETURN_MALLOC_DEFAULT) + .set_page_size(OB_MALLOC_MIDDLE_BLOCK_SIZE) + .set_label("VectorIndexVsag") + .set_ablock_size(lib::INTACT_MIDDLE_AOBJECT_SIZE); + if (OB_FAIL(ROOT_CONTEXT->CREATE_CONTEXT(memory_context_, param))) { + LOG_WARN("create memory entity failed", K(ret)); + } else { + tenant_id_ = tenant_id; + schema_service_ = schema_service; + ls_service_ = ls_service; + is_inited_ = true; + LOG_INFO("plugin vector index service: init", KR(ret), K_(tenant_id)); + } + } + return ret; +} + +int ObPluginVectorIndexService::mtl_init(ObPluginVectorIndexService *&service) +{ + int ret = OB_SUCCESS; + schema::ObMultiVersionSchemaService *schema_service = &GSCHEMASERVICE; + ObLSService *ls_service = MTL(ObLSService*); + + if (OB_FAIL(service->init(MTL_ID(), schema_service, ls_service))) { + LOG_WARN("fail to init plugin vector index service service", KR(ret)); + } + return ret; +} + +int ObPluginVectorIndexService::start() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObPluginVectorIndexService is not inited", KR(ret), K_(tenant_id)); + } + return ret; +} + +void ObPluginVectorIndexService::stop() +{ + if (IS_INIT) { + LOG_INFO("stop vector index service", K_(tenant_id), K_(is_inited)); + } +} + +void ObPluginVectorIndexService::wait() +{ + if (IS_INIT) { + LOG_INFO("wait vector index service", K_(tenant_id)); + } +} + +// ToDo: debug functions, remove later virtual-table ready +void ObPluginVectorIndexMgr::dump_all_inst() +{ + int ret = OB_SUCCESS; + RLockGuard lock_guard(adapter_map_rwlock_); + FOREACH(iter, partial_index_adpt_map_) { + const ObTabletID &tablet_id = iter->first; + ObPluginVectorIndexAdaptor *adapter = iter->second; + ObVectorIndexHNSWParam *hnsw_param = (adapter == nullptr)? nullptr : (ObVectorIndexHNSWParam *)(adapter->get_algo_data()); + LOG_INFO("dump partial index adapter", K(ls_id_), K(tablet_id), KP(adapter), KPC(adapter), KPC(hnsw_param)); + } + FOREACH(iter, complete_index_adpt_map_) { + const ObTabletID &tablet_id = iter->first; + ObPluginVectorIndexAdaptor *adapter = iter->second; + ObVectorIndexHNSWParam *hnsw_param = (adapter == nullptr)? nullptr : (ObVectorIndexHNSWParam *)(adapter->get_algo_data()); + LOG_INFO("dump complete index adapter", K(ls_id_), K(tablet_id), KP(adapter), KPC(adapter), KPC(hnsw_param)); + } +} + +int ObPluginVectorIndexMgr::get_snapshot_tablet_ids( + ObIArray &complete_tablet_ids, + ObIArray &partial_tablet_ids) +{ + int ret = OB_SUCCESS; + ObLSTabletPair pair; + RLockGuard lock_guard(adapter_map_rwlock_); + FOREACH_X(iter, partial_index_adpt_map_, OB_SUCC(ret)) { + const ObTabletID &tablet_id = iter->first; + pair.ls_id_ = ls_id_; + pair.tablet_id_ = tablet_id; + if (OB_FAIL(partial_tablet_ids.push_back(pair))) { + LOG_WARN("failed to push array", K(ret)); + } + } + FOREACH_X(iter, complete_index_adpt_map_, OB_SUCC(ret)) { + const ObTabletID &tablet_id = iter->first; + pair.ls_id_ = ls_id_; + pair.tablet_id_ = tablet_id; + if (OB_FAIL(complete_tablet_ids.push_back(pair))) { + LOG_WARN("failed to push array", K(ret)); + } + } + return ret; +} + +int ObPluginVectorIndexService::get_snapshot_ids( + ObIArray &complete_tablet_ids, + ObIArray &partial_tablet_ids) +{ + int ret = OB_SUCCESS; + FOREACH_X(iter, index_ls_mgr_map_, OB_SUCC(ret)) { + const ObLSID &ls_id = iter->first; + ObPluginVectorIndexMgr *index_ls_mgr = iter->second; + if (OB_FAIL(index_ls_mgr->get_snapshot_tablet_ids(complete_tablet_ids, partial_tablet_ids))) { + LOG_WARN("failed to get snapshot tablet ids", K(ret)); + } + } + return ret; +} + +// for complete +int ObPluginVectorIndexMgr::replace_with_complete_adapter(ObVectorIndexAdapterCandiate *candidate, + ObVecIdxSharedTableInfoMap &info_map, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexAdapterGuard &inc_adapter_guard = candidate->inc_adatper_guard_; + ObPluginVectorIndexAdapterGuard &bitmap_adapter_guard = candidate->bitmp_adatper_guard_; + ObPluginVectorIndexAdapterGuard &sn_adapter_guard = candidate->sn_adatper_guard_; + // create new adapter + ObPluginVectorIndexAdaptor *new_adapter = nullptr; + bool set_success = false; + void *adpt_buff = allocator.alloc(sizeof(ObPluginVectorIndexAdaptor)); + if (OB_ISNULL(adpt_buff)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for vector index adapter", KR(ret)); + } else { + new_adapter = new(adpt_buff)ObPluginVectorIndexAdaptor(&allocator, memory_context_); + new_adapter->set_create_type(CreateTypeComplete); + if (OB_FAIL(new_adapter->merge_parital_index_adapter(inc_adapter_guard.get_adatper()))) { + LOG_WARN("failed to merge inc index adapter", KPC(inc_adapter_guard.get_adatper()), KR(ret)); + } else if (OB_FAIL(new_adapter->merge_parital_index_adapter(bitmap_adapter_guard.get_adatper()))) { + LOG_WARN("failed to merge bitmap index adapter", KPC(bitmap_adapter_guard.get_adatper()), KR(ret)); + } else if (OB_FAIL(new_adapter->merge_parital_index_adapter(sn_adapter_guard.get_adatper()))) { + LOG_WARN("failed to merge snapshot index adapter", KPC(sn_adapter_guard.get_adatper()), KR(ret)); + // still call init to avoid not all 3 part of partial adapter called before merge + } else if (OB_FAIL(new_adapter->init(memory_context_, all_vsag_use_mem_))) { + LOG_WARN("failed to init adpt.", K(ret)); + } else if (!new_adapter->is_vid_rowkey_info_valid()) { + ObVectorIndexSharedTableInfo info; + if (OB_FAIL(info_map.get_refactored(new_adapter->get_data_tablet_id(), info))) { + LOG_WARN("failed to get vector index shared table info", + K(new_adapter->get_data_tablet_id()), KR(ret)); + } else { + new_adapter->set_vid_rowkey_info(info); + } + } + if (OB_FAIL(ret)) { + } else { + WLockGuard lock_guard(adapter_map_rwlock_); + int overwrite = 0; + // should not fail in followring process + if (OB_FAIL(set_complete_adapter_(new_adapter->get_inc_tablet_id(), new_adapter, overwrite))) { + LOG_WARN("failed to set new complete partial adapter", K(new_adapter->get_inc_tablet_id()), KR(ret)); + } else if (OB_FAIL(set_complete_adapter_(new_adapter->get_vbitmap_tablet_id(), new_adapter, overwrite))) { + LOG_WARN("failed to set new complete partial adapter", K(new_adapter->get_vbitmap_tablet_id()), KR(ret)); + if (OB_FAIL(erase_complete_adapter(new_adapter->get_inc_tablet_id()))) { + LOG_WARN("fail to release complete index adapter", K(new_adapter->get_inc_tablet_id()), KR(ret)); + } else { + new_adapter = nullptr; + } + } else if (OB_FAIL(set_complete_adapter_(new_adapter->get_snap_tablet_id(), new_adapter, overwrite))) { + LOG_WARN("failed to set new full partial adapter", K(new_adapter->get_snap_tablet_id()), KR(ret)); + if (OB_FAIL(erase_complete_adapter(new_adapter->get_inc_tablet_id()))) { + LOG_WARN("fail to release complete index adapter", K(new_adapter->get_inc_tablet_id()), KR(ret)); + } else if (OB_FAIL(erase_complete_adapter(new_adapter->get_vbitmap_tablet_id()))) { + LOG_WARN("fail to release complete index adapter", K(new_adapter->get_vbitmap_tablet_id()), KR(ret)); + } else { + new_adapter = nullptr; + } + } else { + set_success = true; + if (OB_FAIL(erase_partial_adapter_(new_adapter->get_inc_tablet_id()))) { + LOG_WARN("fail to release partial index adapter", K(new_adapter->get_inc_tablet_id()), KR(ret)); + } else if (OB_FAIL(erase_partial_adapter_(new_adapter->get_vbitmap_tablet_id()))) { + LOG_WARN("fail to release partial index adapter", K(new_adapter->get_vbitmap_tablet_id()), KR(ret)); + } else if (OB_FAIL(erase_partial_adapter_(new_adapter->get_snap_tablet_id()))) { + LOG_WARN("fail to release partial index adapter", K(new_adapter->get_snap_tablet_id()), KR(ret)); + } + } + } + } + if (OB_FAIL(ret) && OB_NOT_NULL(new_adapter) && set_success == false) { + new_adapter->~ObPluginVectorIndexAdaptor(); + allocator.free(adpt_buff); + new_adapter = nullptr; + adpt_buff = nullptr; + } + return ret; +} + +// for full partial +int ObPluginVectorIndexMgr::replace_with_full_partial_adapter(ObVectorIndexAcquireCtx &ctx, + ObIAllocator &allocator, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObString *vec_index_param, + int64_t dim, + ObVectorIndexAdapterCandiate *candidate) +{ + int ret = OB_SUCCESS; + ObPluginVectorIndexAdapterGuard &inc_adapter_guard = candidate->inc_adatper_guard_; + ObPluginVectorIndexAdapterGuard &bitmap_adapter_guard = candidate->bitmp_adatper_guard_; + ObPluginVectorIndexAdapterGuard &sn_adapter_guard = candidate->sn_adatper_guard_; + // create new adapter + ObPluginVectorIndexAdaptor *new_adapter = nullptr; + bool set_success = false; + void *adpt_buff = allocator.alloc(sizeof(ObPluginVectorIndexAdaptor)); + if (OB_ISNULL(adpt_buff)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for vector index adapter", KR(ret)); + } else { + new_adapter = new(adpt_buff)ObPluginVectorIndexAdaptor(&allocator, memory_context_); + new_adapter->set_create_type(CreateTypeFullPartial); + if (OB_FAIL(new_adapter->set_tablet_id(VIRT_INC, ctx.inc_tablet_id_))) { + LOG_WARN("failed to set inc tablet id", K(ctx), KR(ret)); + } else if (OB_FAIL(new_adapter->set_tablet_id(VIRT_BITMAP, ctx.vbitmap_tablet_id_))) { + LOG_WARN("failed to set snapshot bitmap tablet id", K(ctx), KR(ret)); + } else if (OB_FAIL(new_adapter->set_tablet_id(VIRT_SNAP, ctx.snapshot_tablet_id_))) { + LOG_WARN("failed to set snapshot index tablet id", K(ctx), KR(ret)); + } else if (OB_FAIL(new_adapter->set_tablet_id(VIRT_DATA, ctx.data_tablet_id_))) { + LOG_WARN("failed to set data tablet id", K(ctx), KR(ret)); + } else if (OB_FAIL(new_adapter->merge_parital_index_adapter(inc_adapter_guard.get_adatper()))) { + LOG_WARN("failed to merge inc index adapter", + K(ctx), KPC(inc_adapter_guard.get_adatper()), KR(ret)); + } else if (OB_FAIL(new_adapter->merge_parital_index_adapter(bitmap_adapter_guard.get_adatper()))) { + LOG_WARN("failed to merge bitmap index adapter", + K(ctx), KPC(bitmap_adapter_guard.get_adatper()), KR(ret)); + } else if (OB_FAIL(new_adapter->merge_parital_index_adapter(sn_adapter_guard.get_adatper()))) { + LOG_WARN("failed to merge snapshot index adapter", + K(ctx), KPC(sn_adapter_guard.get_adatper()), KR(ret)); + // still call init to avoid not all 3 part of partial adapter called before merge + } else if ((OB_NOT_NULL(vec_index_param) && !vec_index_param->empty()) + && OB_FAIL(new_adapter->init(*vec_index_param, dim, memory_context_, all_vsag_use_mem_))) { + LOG_WARN("failed to init adpt.", K(ret), K(*vec_index_param), K(dim)); + } else { + WLockGuard lock_guard(adapter_map_rwlock_); + int overwrite = 1; + // should not fail in followring process + if (OB_FAIL(set_partial_adapter_(ctx.inc_tablet_id_, new_adapter, overwrite))) { + LOG_WARN("failed to set new full partial adapter", K(ctx.inc_tablet_id_), KR(ret)); + } else if (OB_FAIL(set_partial_adapter_(ctx.vbitmap_tablet_id_, new_adapter, overwrite))) { + LOG_WARN("failed to set new full partial adapter", K(ctx.vbitmap_tablet_id_), KR(ret)); + } else if (OB_FAIL(set_partial_adapter_(ctx.snapshot_tablet_id_, new_adapter, overwrite))) { + LOG_WARN("failed to set new full partial adapter", K(ctx.snapshot_tablet_id_), KR(ret)); + } else if (OB_FAIL(adapter_guard.set_adapter(new_adapter))) { + LOG_WARN("failed to set adapter", K(ctx), KR(ret)); + } else { + bool set_success = false; + // release because they are removed from hashmap + ObPluginVectorIndexAdaptor *inc_adapter = inc_adapter_guard.get_adatper(); + ObPluginVectorIndexAdaptor *bitmap_adapter = bitmap_adapter_guard.get_adatper(); + ObPluginVectorIndexAdaptor *sn_adapter = sn_adapter_guard.get_adatper(); + + if (OB_FAIL(ObPluginVectorIndexUtils::release_vector_index_adapter(inc_adapter))) { + LOG_WARN("fail to release vector index adapter", + K(ctx.inc_tablet_id_), KPC(inc_adapter), KR(ret)); + } else if (OB_FAIL(ObPluginVectorIndexUtils::release_vector_index_adapter(bitmap_adapter))) { + LOG_WARN("fail to release vector index adapter", + K(ctx.vbitmap_tablet_id_), KPC(bitmap_adapter), KR(ret)); + } else if (OB_FAIL(ObPluginVectorIndexUtils::release_vector_index_adapter(sn_adapter))) { + LOG_WARN("fail to release vector index adapter", + K(ctx.snapshot_tablet_id_), KPC(sn_adapter), KR(ret)); + } + } + } + } + if (OB_FAIL(ret) && set_success == false && OB_NOT_NULL(new_adapter)) { + new_adapter->~ObPluginVectorIndexAdaptor(); + allocator.free(adpt_buff); + adpt_buff = nullptr; + new_adapter = nullptr; + } + return ret; + +} + +} +} \ No newline at end of file diff --git a/src/share/vector_index/ob_plugin_vector_index_service.h b/src/share/vector_index/ob_plugin_vector_index_service.h new file mode 100644 index 0000000000..466d5097fa --- /dev/null +++ b/src/share/vector_index/ob_plugin_vector_index_service.h @@ -0,0 +1,327 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_OBSERVER_OB_PLUGIN_VECTOR_INDEX_SERVICE_DEFINE_H_ +#define OCEANBASE_OBSERVER_OB_PLUGIN_VECTOR_INDEX_SERVICE_DEFINE_H_ +#include "share/ob_ls_id.h" +#include "share/scn.h" +#include "lib/lock/ob_recursive_mutex.h" +#include "share/rc/ob_tenant_base.h" +#include "share/vector_index/ob_plugin_vector_index_adaptor.h" +#include "share/vector_index/ob_plugin_vector_index_scheduler.h" +#include "observer/table/ttl/ob_tenant_ttl_manager.h" +#include "share/vector_index/ob_plugin_vector_index_util.h" + + +namespace oceanbase +{ +namespace share +{ + +struct ObVectorIndexAcquireCtx +{ + ObTabletID inc_tablet_id_; + ObTabletID vbitmap_tablet_id_; + ObTabletID snapshot_tablet_id_; + ObTabletID data_tablet_id_; + + TO_STRING_KV(K_(inc_tablet_id), K_(vbitmap_tablet_id), K_(snapshot_tablet_id), K_(data_tablet_id)); +}; + +class ObVectorIndexAdapterCandiate final +{ +public: + ObVectorIndexAdapterCandiate() + : is_init_(true), + is_valid_(true), + inc_adatper_guard_(), + bitmp_adatper_guard_(), + sn_adatper_guard_() + {} + + ~ObVectorIndexAdapterCandiate() { + is_init_ = false; + is_valid_ = false; + } + + TO_STRING_KV(K_(is_init), K_(is_valid), K_(inc_adatper_guard), K_(bitmp_adatper_guard), K_(sn_adatper_guard)); + +public: + bool is_init_; + bool is_valid_; + bool is_complete() + { + return inc_adatper_guard_.is_valid() && bitmp_adatper_guard_.is_valid() && sn_adatper_guard_.is_valid(); + } + ObPluginVectorIndexAdapterGuard inc_adatper_guard_; + ObPluginVectorIndexAdapterGuard bitmp_adatper_guard_; + ObPluginVectorIndexAdapterGuard sn_adatper_guard_; +}; + +// Manage all vector index adapter in a ls +typedef common::hash::ObHashMap VectorIndexAdaptorMap; +typedef common::hash::ObHashMap VectorIndexMemSyncMap; +class ObPluginVectorIndexMgr +{ +public: + ObPluginVectorIndexMgr(lib::MemoryContext &memory_context) + : is_inited_(false), + need_check_(false), + ls_id_(), + complete_index_adpt_map_(), + partial_index_adpt_map_(), + adapter_map_rwlock_(), + ls_tablet_task_ctx_(), + tenant_id_(0), + interval_factor_(0), + vector_index_service_(nullptr), + processing_first_mem_sync_(true), + first_mem_sync_map_(), + second_mem_sync_map_(), + task_allocator_(ObMemAttr(MTL_ID(), "VecIdxTask")), + memory_context_(memory_context), + all_vsag_use_mem_(nullptr) + {} + virtual ~ObPluginVectorIndexMgr(); + + int init(uint64_t tenant_id, ObLSID ls_id, lib::MemoryContext &memory_context, uint64_t *all_vsag_use_mem); + ObLSID& get_ls_id() { return ls_id_; } + uint64_t get_tenant_id() { return tenant_id_; } + ObPluginVectorIndexLSTaskCtx& get_ls_task_ctx() { return ls_tablet_task_ctx_; } + VectorIndexAdaptorMap& get_partial_adapter_map() { return partial_index_adpt_map_; } + VectorIndexAdaptorMap& get_complete_adapter_map() { return complete_index_adpt_map_; } + + // thread save interface + void destroy(); + + void release_all_adapters(); + + int get_adapter_inst_guard(ObTabletID tablet_id, ObPluginVectorIndexAdapterGuard &adpt_guard); + int create_partial_adapter(ObTabletID idx_tablet_id, + ObTabletID data_tablet_id, + ObIndexType type, + ObIAllocator &allocator, + int64_t index_table_id, + ObString *vec_index_param = nullptr, + int64_t dim = 0); + int replace_with_complete_adapter(ObVectorIndexAdapterCandiate *candidate, + ObVecIdxSharedTableInfoMap &info_map, + ObIAllocator &allocator); + int replace_with_full_partial_adapter(ObVectorIndexAcquireCtx &ctx, + ObIAllocator &allocator, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObString *vec_index_param, + int64_t dim, + ObVectorIndexAdapterCandiate *candidate); + + int get_or_create_partial_adapter(ObTabletID tablet_id, + ObIndexType type, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObString *vec_index_param, + int64_t dim); + + int get_adapter_inst_by_ctx(ObVectorIndexAcquireCtx &ctx, + bool &need_merge, + ObIAllocator &allocator, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObVectorIndexAdapterCandiate &candidate, + ObString *vec_index_param, + int64_t dim); + + int get_and_merge_adapter(ObVectorIndexAcquireCtx &ctx, + ObIAllocator &allocator, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObString *vec_index_param, + int64_t dim); + int check_and_merge_partial_inner(ObVecIdxSharedTableInfoMap &info_map, ObIAllocator &allocator); + + // maintance interface + int check_need_mem_data_sync_task(bool &need_sync); + int erase_complete_adapter(ObTabletID tablet_id); + int erase_partial_adapter(ObTabletID tablet_id); + + VectorIndexMemSyncMap &get_processing_map() { return processing_first_mem_sync_ ? first_mem_sync_map_ : second_mem_sync_map_; } + VectorIndexMemSyncMap &get_waiting_map() { return processing_first_mem_sync_ ? second_mem_sync_map_ : first_mem_sync_map_; } + void switch_processing_map() { processing_first_mem_sync_ = !processing_first_mem_sync_; } + + ObIAllocator &get_task_allocator() { return task_allocator_; } + + // debug interface + void dump_all_inst(); + // for virtual table + int get_snapshot_tablet_ids(ObIArray &complete_tablet_ids, ObIArray &partial_tablet_ids); + + TO_STRING_KV(K_(is_inited), K_(need_check), K_(ls_id), K_(ls_tablet_task_ctx)); +private: + // non-thread save inner functions + int get_adapter_inst_(ObTabletID tablet_id, ObPluginVectorIndexAdaptor *&index_inst); + int set_complete_adapter_(ObTabletID tablet_id, ObPluginVectorIndexAdaptor *adapter_inst, int overwrite = 0); + + int set_partial_adapter_(ObTabletID tablet_id, ObPluginVectorIndexAdaptor *adapter_inst, int overwrite = 0); + int erase_partial_adapter_(ObTabletID tablet_id); + // thread save inner functions + int get_or_create_partial_adapter_(ObTabletID tablet_id, + ObIndexType type, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObString *vec_index_param, + int64_t dim, + ObIAllocator &allocator); + +private: + static const int64_t DEFAULT_ADAPTER_HASH_SIZE = 1000; + static const int64_t DEFAULT_CANDIDATE_ADAPTER_HASH_SIZE = 1000; + + typedef common::RWLock RWLock; + typedef RWLock::RLockGuard RLockGuard; + typedef RWLock::WLockGuard WLockGuard; + + bool is_inited_; + bool need_check_; // schema version change, or ls/tablet not existed + share::ObLSID ls_id_; + VectorIndexAdaptorMap complete_index_adpt_map_; // map of complete index adapters with full info + VectorIndexAdaptorMap partial_index_adpt_map_; // map of passive created index adapters + RWLock adapter_map_rwlock_; // lock for adapter maps + ObPluginVectorIndexLSTaskCtx ls_tablet_task_ctx_; // task ctx of ls level + uint64_t tenant_id_; + uint32_t interval_factor_; // used to expand real execute interval + ObPluginVectorIndexService *vector_index_service_; + int64_t local_schema_version_; // detect schema change + + + // pingpong map for follower receive memdata sync task from log + bool processing_first_mem_sync_; + VectorIndexMemSyncMap first_mem_sync_map_; + VectorIndexMemSyncMap second_mem_sync_map_; + ObArenaAllocator task_allocator_; + lib::MemoryContext &memory_context_; + uint64_t *all_vsag_use_mem_; +}; + +// id to unique identify an vector index adapter +struct ObPluginVectorIndexIdentity +{ + ObPluginVectorIndexIdentity() : data_tablet_id_(), index_identity_() {}; + ObPluginVectorIndexIdentity(ObTabletID data_tablet_id, common::ObString index_identity) + : data_tablet_id_(data_tablet_id), index_identity_(index_identity) + {} + ~ObPluginVectorIndexIdentity() + { + data_tablet_id_.reset(); + index_identity_.reset(); + } + bool is_valid() { return data_tablet_id_.is_valid() && !index_identity_.empty(); } + uint64_t hash() const + { + int64_t hash_value = data_tablet_id_.hash(); + hash_value += index_identity_.hash(); + return hash_value; + } + inline int hash(uint64_t &hash_val) const + { + hash_val = hash(); + return OB_SUCCESS; + } + bool operator==(const ObPluginVectorIndexIdentity &other) const + { + return data_tablet_id_ == other.data_tablet_id_ && index_identity_ == other.index_identity_; + } + TO_STRING_KV(K_(data_tablet_id), K_(index_identity)); + + ObTabletID data_tablet_id_; + ObString index_identity_; // index_name_prefix +}; + +typedef common::hash::ObHashMap LSIndexMgrMap; +// Manage all vector index adapters of a tenant +class ObPluginVectorIndexService +{ +public: + ObPluginVectorIndexService() + : is_inited_(false), + has_start_(false), + tenant_id_(OB_INVALID_TENANT_ID), + is_ls_or_tablet_changed_(false), + schema_service_(NULL), + ls_service_(NULL), + memory_context_(NULL), + all_vsag_use_mem_(0) + + {} + virtual ~ObPluginVectorIndexService(); + int init(const uint64_t tenant_id, + schema::ObMultiVersionSchemaService *schema_service, + ObLSService *ls_service); + bool is_inited() { return is_inited_; } + // mtl interfaces + static int mtl_init(ObPluginVectorIndexService *&service); + int start(); + void stop(); + void wait(); + void destroy(); + + // feature interfaces + LSIndexMgrMap &get_ls_index_mgr_map() { return index_ls_mgr_map_; }; + int get_adapter_inst_guard(ObLSID ls_id, ObTabletID tablet_id, ObPluginVectorIndexAdapterGuard &adapter_guard); + int create_partial_adapter(ObLSID ls_id, + ObTabletID idx_tablet_id, + ObTabletID data_tablet_id, + ObIndexType type, + int64_t index_table_id, + ObString *vec_index_param = nullptr, + int64_t dim = 0); + int check_and_merge_adapter(ObLSID ls_id, ObVecIdxSharedTableInfoMap &info_map); + int acquire_vector_index_mgr(ObLSID ls_id, ObPluginVectorIndexMgr *&mgr); + + // user interfaces + int acquire_adapter_guard(ObLSID ls_id, + ObTabletID tablet_id, + ObIndexType type, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObString *vec_index_param = nullptr, + int64_t dim = 0); + int acquire_adapter_guard(ObLSID ls_id, + ObVectorIndexAcquireCtx &ctx, + ObPluginVectorIndexAdapterGuard &adapter_guard, + ObString *vec_index_param = nullptr, + int64_t dim = 0); + + // debug interface + int dump_all_inst(); + // for virtual table + int get_snapshot_ids(ObIArray &complete_tablet_ids, ObIArray &partial_tablet_ids); + + TO_STRING_KV(K_(is_inited), K_(has_start), K_(tenant_id), + K_(is_ls_or_tablet_changed), KP_(schema_service), KP_(ls_service)); +private: + static const int64_t BASIC_TIMER_INTERVAL = 30 * 1000 * 1000; // 30s + static const int64_t VEC_INDEX_LOAD_TIME_TASKER_THRESHOLD = 30 * 1000 * 1000; // 30s + static const int64_t DEFAULT_LS_HASH_SIZE = 64; + bool is_inited_; + bool has_start_; + int64_t tenant_id_; + LSIndexMgrMap index_ls_mgr_map_; + bool is_ls_or_tablet_changed_; + + share::schema::ObMultiVersionSchemaService *schema_service_; + storage::ObLSService *ls_service_; + ObFIFOAllocator allocator_; + common::ObArenaAllocator alloc_; + lib::MemoryContext memory_context_; + uint64_t all_vsag_use_mem_; + +public: + volatile bool stop_flag_; +}; + +} // namespace share +} // namespace oceanbase +#endif \ No newline at end of file diff --git a/src/share/vector_index/ob_plugin_vector_index_util.cpp b/src/share/vector_index/ob_plugin_vector_index_util.cpp new file mode 100644 index 0000000000..b9018ae15d --- /dev/null +++ b/src/share/vector_index/ob_plugin_vector_index_util.cpp @@ -0,0 +1,244 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file is for define of plugin vector index util + */ + +#define USING_LOG_PREFIX SHARE + +#include "ob_plugin_vector_index_util.h" +#include "share/rc/ob_tenant_base.h" +#include "storage/tx_storage/ob_tenant_freezer.h" + +namespace oceanbase +{ +namespace share +{ + +int ObVectorQueryVidIterator::init() +{ + INIT_SUCC(ret); + if (OB_ISNULL(row_ = static_cast(allocator_->alloc(sizeof(ObNewRow))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocator NewRow.", K(ret)); + } else if (OB_ISNULL(obj_ = static_cast(allocator_->alloc(sizeof(ObObj))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocator NewRow.", K(ret)); + } else { + is_init_ = true; + } + return ret; +} + +int ObVectorQueryVidIterator::init(int64_t total, int64_t *vids, ObIAllocator *allocator) +{ + INIT_SUCC(ret); + if ((OB_ISNULL(vids) && total != 0) || OB_ISNULL(allocator)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get vids or allocator", K(ret), K(vids), K(allocator)); + } else if (OB_ISNULL(row_ = OB_NEWx(ObNewRow, allocator))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocator NewRow.", K(ret)); + } else if (OB_ISNULL(obj_ = OB_NEWx(ObObj, allocator))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocator NewRow.", K(ret)); + } else { + is_init_ = true; + total_ = total; + cur_pos_ = 0; + vids_ = vids; + allocator_ = allocator; + } + return ret; +} + +int ObVectorQueryVidIterator::get_next_row(ObNewRow *&row) +{ + INIT_SUCC(ret); + if (!is_init_) { + ret = OB_NOT_INIT; + LOG_WARN("iter is not initialized.", K(ret)); + } else if (cur_pos_ < total_) { + obj_->reset(); + row_->reset(); + + obj_->set_int(vids_[cur_pos_++]); + row_->cells_ = obj_; + row_->count_ = 1; + row_->projector_ = NULL; + row_->projector_size_ = 0; + + row = row_; + } else { + ret = OB_ITER_END; + } + return ret; +} + +int ObVectorQueryVidIterator::get_next_rows(ObNewRow *&row, int64_t &size) +{ + INIT_SUCC(ret); + if (!is_init_) { + ret = OB_NOT_INIT; + LOG_WARN("iter is not initialized.", K(ret)); + } else if (cur_pos_ < total_) { + size = 0; + row = nullptr; + ObObj *obj = nullptr; + if (batch_size_ > 0) { + if (OB_ISNULL(row = static_cast(allocator_->alloc(sizeof(ObNewRow))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocator NewRow.", K(ret)); + } else if (OB_ISNULL(obj = static_cast(allocator_->alloc(sizeof(ObObj) * batch_size_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocator NewRow.", K(ret)); + } else { + int64_t index = 0; + for (; index < batch_size_ && cur_pos_ < total_; ++index) { + obj[index].set_int(vids_[cur_pos_++]); + } + row->cells_ = obj; + row->count_ = index; + row->projector_ = NULL; + row->projector_size_ = 0; + size = index; + } + } + } else { + ret = OB_ITER_END; + } + + return ret; +} + +void ObVectorQueryVidIterator::reset() +{ + is_init_ = false; + total_ = 0; + cur_pos_ = 0; + if (OB_NOT_NULL(allocator_)) { + allocator_->reset(); + } +} + +int ObPluginVectorIndexHelper::merge_delta_and_snap_vids(const ObVsagQueryResult &first, + const ObVsagQueryResult &second, + const int64_t total, + int64_t &actual_cnt, + int64_t *&vids_result) +{ + INIT_SUCC(ret); + actual_cnt = 0; + int64_t res_num = 0; + if (first.total_ == 0) { + while (res_num < total && res_num < second.total_) { + vids_result[res_num] = second.vids_[res_num]; + res_num++; + } + actual_cnt = res_num; + } else if (second.total_ == 0) { + while (res_num < total && res_num < first.total_) { + vids_result[res_num] = first.vids_[res_num]; + res_num++; + } + actual_cnt = res_num; + } else if (OB_ISNULL(first.vids_) || OB_ISNULL(second.vids_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get vids invalid.", K(ret), K(first.vids_), K(second.vids_)); + } else { + int64_t i = 0, j = 0; + while (res_num < total && i < first.total_ && j < second.total_) { + if (first.distances_[i] <= second.distances_[j]) { + vids_result[res_num++] = first.vids_[i++]; + } else { + vids_result[res_num++] = second.vids_[j++]; + } + } + + while (res_num < total && i < first.total_) { + vids_result[res_num++] = first.vids_[i++]; + } + + while (res_num < total && j < second.total_) { + vids_result[res_num++] = second.vids_[j++]; + } + + actual_cnt = res_num; + } + + return ret; +} + +int ObPluginVectorIndexHelper::get_vector_memory_value_and_limit(const uint64_t tenant_id,int64_t& value, int64_t& upper_limit) +{ + int ret = OB_SUCCESS; + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id)); + int64_t extra_mem_percent = 15; + int64_t cur_memstore_limit_percent = 0; + MTL_SWITCH(tenant_id) { + cur_memstore_limit_percent = MTL(ObTenantFreezer*)->get_memstore_limit_percentage(); + } + if (tenant_config.is_valid() && OB_SUCC(ret)) { + upper_limit = (100 - extra_mem_percent - cur_memstore_limit_percent); + if (upper_limit < 0) { + upper_limit = 0; + } + value = tenant_config->ob_vector_memory_limit_percentage; + LOG_TRACE("check is_ob_vector_memory_valid", K(value), K(extra_mem_percent), K(cur_memstore_limit_percent), K(upper_limit)); + } else { + upper_limit = 0; + value = 0; + ret = OB_INVALID_CONFIG; + LOG_ERROR("tenant config is invalid",K(ret), K(tenant_id)); + } + return ret; +} + +int ObPluginVectorIndexHelper::is_ob_vector_memory_valid(const uint64_t tenant_id, bool& is_valid) +{ + int ret = OB_SUCCESS; + is_valid = false; + int64_t value = 0; + int64_t upper_limit = 0; + if (OB_FAIL(get_vector_memory_value_and_limit(tenant_id, value, upper_limit))) { + LOG_WARN("fail to get vector memory value and limit", K(ret)); + } else if (0 < value && value < upper_limit) { + is_valid = true; + } + return ret; +} + +int ObPluginVectorIndexHelper::get_vector_memory_limit_size(const uint64_t tenant_id, int64_t& memory_limit) +{ + bool ret = OB_SUCCESS; + ObUnitInfoGetter::ObTenantConfig unit; + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(GCTX.omt_->get_tenant_unit(tenant_id, unit))) { + LOG_WARN("get tenant unit failed", K(tmp_ret), K(tenant_id)); + } else { + const int64_t memory_size = unit.config_.memory_size(); + int64_t ob_vector_memory_limit_percentage = 0; + int64_t upper_limit = 0; + if (OB_FAIL(get_vector_memory_value_and_limit(tenant_id, ob_vector_memory_limit_percentage, upper_limit))) { + LOG_WARN("fail to get vector memory value and limit", K(ret)); + } else if (0 < ob_vector_memory_limit_percentage && ob_vector_memory_limit_percentage < upper_limit) { + memory_limit = memory_size * ob_vector_memory_limit_percentage / 100; + LOG_TRACE("vector index memory limit debug", K(memory_size), K(ob_vector_memory_limit_percentage),K(memory_limit)); + } else { + memory_limit = 0; + LOG_TRACE("vector index memory is not enough,check memstore config", K(memory_size), K(ob_vector_memory_limit_percentage),K(memory_limit)); + } + } + return ret; +} + + +}; +}; \ No newline at end of file diff --git a/src/share/vector_index/ob_plugin_vector_index_util.h b/src/share/vector_index/ob_plugin_vector_index_util.h new file mode 100644 index 0000000000..04786f2876 --- /dev/null +++ b/src/share/vector_index/ob_plugin_vector_index_util.h @@ -0,0 +1,99 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file is for define of plugin vector index util + */ + +#ifndef OCEANBASE_SHARE_PLUGIN_VECTOR_INDEX_UTIL_H_ +#define OCEANBASE_SHARE_PLUGIN_VECTOR_INDEX_UTIL_H_ + +#include "common/object/ob_obj_type.h" +#include "common/row/ob_row_iterator.h" +#include "lib/vector/ob_vector_util.h" + +namespace oceanbase +{ +namespace share +{ + +class ObVectorQueryVidIterator : public common::ObNewRowIterator +{ +public: + ObVectorQueryVidIterator(int64_t total, int64_t *vid, ObIAllocator *allocator) + : is_init_(false), + total_(total), + cur_pos_(0), + batch_size_(0), + vids_(vid), + row_(nullptr), + obj_(nullptr), + allocator_(allocator) {}; + + ObVectorQueryVidIterator() + : is_init_(false), + total_(0), + cur_pos_(0), + batch_size_(0), + vids_(nullptr), + row_(nullptr), + obj_(nullptr), + allocator_(nullptr) {}; + virtual ~ObVectorQueryVidIterator() {}; + int init(); + int init(int64_t total, int64_t *vids, ObIAllocator *allocator); + void set_batch_size(int64_t batch_size) { batch_size_ = batch_size; } + + virtual int get_next_row(ObNewRow *&row) override; + virtual int get_next_rows(ObNewRow *&row, int64_t &size) override; + virtual int get_next_row() override { return OB_NOT_IMPLEMENT; } + virtual void reset() override; + +private: + bool is_init_; + int64_t total_; + int64_t cur_pos_; + int64_t batch_size_; + int64_t *vids_; + ObNewRow *row_; + ObObj *obj_; + ObIAllocator *allocator_; +}; + +struct ObVsagQueryResult +{ + int64_t total_; + const int64_t *vids_; + const float *distances_; +}; + +class ObPluginVectorIndexHelper final +{ +public: + static int merge_delta_and_snap_vids(const ObVsagQueryResult &first, + const ObVsagQueryResult &second, + const int64_t total, + int64_t &actual_cnt, + int64_t *&vids_result); + + static int get_vector_memory_value_and_limit(const uint64_t tenant_id, + int64_t& value, + int64_t& upper_limit); + + static int is_ob_vector_memory_valid(const uint64_t tenant_id, + bool& is_valid); + + static int get_vector_memory_limit_size(const uint64_t tenant_id, + int64_t& memory_limit); +}; + +}; +}; + +#endif \ No newline at end of file diff --git a/src/share/vector_index/ob_plugin_vector_index_utils.cpp b/src/share/vector_index/ob_plugin_vector_index_utils.cpp new file mode 100644 index 0000000000..3700a8b042 --- /dev/null +++ b/src/share/vector_index/ob_plugin_vector_index_utils.cpp @@ -0,0 +1,1217 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#define USING_LOG_PREFIX SERVER +#include "share/ob_errno.h" +#include "lib/oblog/ob_log_module.h" +#include "share/vector_index/ob_plugin_vector_index_utils.h" +#include "share/vector_index/ob_plugin_vector_index_adaptor.h" +#include "share/vector_index/ob_plugin_vector_index_scheduler.h" +#include "src/share/vector_index/ob_plugin_vector_index_serialize.h" +#include "share/ob_vec_index_builder_util.h" +#include "storage/ls/ob_ls.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "storage/access/ob_table_access_param.h" +#include "storage/access/ob_dml_param.h" +#include "storage/tx_storage/ob_access_service.h" +#include "storage/access/ob_table_scan_iterator.h" +#include "lib/vector/ob_vector_util.h" +#include "common/rowkey/ob_rowkey.h" +#include "src/share/schema/ob_tenant_schema_service.h" + +namespace oceanbase +{ +namespace share +{ + +int ObPluginVectorIndexUtils::get_task_read_snapshot(ObLSID &ls_id, SCN &read_version) +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + // ObLSWRSHandler::get_ls_weak_read_ts + storage::ObLSService *ls_svr = MTL(storage::ObLSService*); + if (OB_FAIL(ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::SHARE_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(ls_id)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null ls", K(ret), K(ls_id)); + } else { + read_version = ls->get_ls_wrs_handler()->get_ls_weak_read_ts(); + } + + return ret; +} + +int ObPluginVectorIndexUtils::add_key_ranges(uint64_t table_id, ObRowkey& rowkey, storage::ObTableScanParam &scan_param) +{ + INIT_SUCC(ret); + ObNewRange new_range; + if (OB_FAIL(new_range.build_range(table_id, rowkey))) { + LOG_WARN("failed to build range.", K(ret), K(table_id), K(rowkey)); + } else if (FALSE_IT(scan_param.key_ranges_.reset())) { + } else if (OB_FAIL(scan_param.key_ranges_.push_back(new_range))) { + LOG_WARN("failed to build key ranges.", K(ret), K(table_id), K(rowkey)); + } + + return ret; +} + +int ObPluginVectorIndexUtils::iter_table_rescan(storage::ObTableScanParam &scan_param, common::ObNewRowIterator *iter) +{ + INIT_SUCC(ret); + ObAccessService *tsc_service = MTL(ObAccessService *); + + if (OB_FAIL(tsc_service->reuse_scan_iter(false, iter))) { + LOG_WARN("failed to reuse scan iter.", K(ret)); + } else if (OB_FAIL(tsc_service->table_rescan(scan_param, iter))) { + LOG_WARN("failed to rescan iter.", K(ret)); + } + + return ret; +} + +int ObPluginVectorIndexUtils::read_object_from_data_table_iter(ObObj *&input_obj, + int32_t data_table_rowkey_count, + uint64_t table_id, + storage::ObTableScanParam &scan_param, + common::ObNewRowIterator *iter, + schema::ObIndexType type, + ObIAllocator &allocator, + ObObj &output_obj, + bool &get_data) +{ + INIT_SUCC(ret); + ObRowkey rowkey(input_obj, data_table_rowkey_count); + + ObString vector; + if (OB_FAIL(add_key_ranges(table_id, rowkey, scan_param))) { + LOG_WARN("failed to set vid id key", K(ret)); + } else if (OB_FAIL(iter_table_rescan(scan_param, iter))) { + LOG_WARN("failed to recan vid id scan param.", K(ret)); + } else { + blocksstable::ObDatumRow *datum_row = nullptr; + storage::ObTableScanIterator *scan_iter = dynamic_cast(iter); + + if (OB_ISNULL(scan_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to cast to vid iter.", K(ret)); + } else if (OB_FAIL(scan_iter->get_next_row(datum_row))) { + if (OB_ITER_END != ret) { + LOG_WARN("failed to get next row from next table.", K(ret)); + } else { + output_obj.reset(); + ret = OB_SUCCESS; + } + } else { + if (datum_row->get_column_count() != 1) { // only vector columsn + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get row column cnt invalid.", K(ret), K(datum_row->get_column_count())); + } else { + char *copy_str = nullptr; + ObString vector = datum_row->storage_datums_[0].get_string(); + int64_t size = vector.length(); + if (OB_ISNULL(copy_str = static_cast(allocator.alloc(sizeof(char*) * size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocator.", K(ret)); + } else { + memcpy(copy_str, vector.ptr(), size); + output_obj.reset(); + output_obj.set_string(ObVarcharType, copy_str, size); + get_data = true; + } + } + } + } + + return ret; +} + +int ObPluginVectorIndexUtils::read_object_from_vid_rowkey_table_iter(ObObj *input_obj, + uint64_t table_id, + storage::ObTableScanParam &scan_param, + common::ObNewRowIterator *iter, + schema::ObIndexType type, + ObIAllocator &allocator, + ObObj *&output_obj, + int32_t data_table_rowkey_count) +{ + INIT_SUCC(ret); + ObRowkey rowkey(input_obj, 1); // vid_rowkey table only has one rowkey column + + if (OB_FAIL(add_key_ranges(table_id, rowkey, scan_param))) { + LOG_WARN("failed to set vid id key", K(ret)); + } else if (OB_FAIL(iter_table_rescan(scan_param, iter))) { + LOG_WARN("failed to recan vid id scan param.", K(ret)); + } else { + blocksstable::ObDatumRow *datum_row = nullptr; + storage::ObTableScanIterator *scan_iter = dynamic_cast(iter); + + if (OB_ISNULL(scan_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to cast to vid iter.", K(ret)); + } else if (OB_FAIL(scan_iter->get_next_row(datum_row))) { + if (OB_ITER_END != ret) { + LOG_WARN("failed to get next row from next table.", K(ret)); + } else { + ret = OB_SUCCESS; + } + } else { + const ObIArray *out_col_param + = scan_param.table_param_->get_read_info().get_columns(); + + if (datum_row->get_column_count() != data_table_rowkey_count + 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get row column cnt invalid.", K(ret), K(datum_row->get_column_count())); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < data_table_rowkey_count; ++i) { + ObObj tmp_obj; + output_obj[i].reset(); + ObObjMeta meta_type = out_col_param->at(i + 1)->get_meta_type(); + if (OB_FAIL(datum_row->storage_datums_[i + 1].to_obj(tmp_obj, meta_type))) { + LOG_WARN("failed to convert datum to obj.", K(ret), K(i), K(datum_row->storage_datums_[i + 1])); + } else if (OB_FAIL(ob_write_obj(allocator, tmp_obj, output_obj[i]))) { + LOG_WARN("failed to write obj.", K(ret), K(i), K(tmp_obj)); + } + } + } + } + } + + return ret; +} + +int ObPluginVectorIndexUtils::get_vec_column_id( + ObSEArray &vector_column_ids, + uint64_t incr_index_table_id, + uint64_t data_table_id) +{ + INIT_SUCC(ret); + ObSchemaGetterGuard schema_guard; + const ObTableSchema *delta_buffer_schema = nullptr; + const ObTableSchema *table_schema = nullptr; + uint64_t tenant_id = MTL_ID(); + + ObMultiVersionSchemaService *schema_service = MTL(schema::ObTenantSchemaService*)->get_schema_service(); + if (OB_ISNULL(schema_service)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret), KP(schema_service)); + } else if (OB_FAIL(schema_service->get_tenant_schema_guard(tenant_id, schema_guard))) { + LOG_WARN("failed to get schema manager", K(ret), K(tenant_id)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, incr_index_table_id, delta_buffer_schema))) { + LOG_WARN("failed to get table schema by index id.", K(ret), K(tenant_id), K(incr_index_table_id)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, data_table_id, table_schema))) { + LOG_WARN("failed to get data table scheam.", K(ret), K(data_table_id)); + } else if (OB_ISNULL(delta_buffer_schema) || OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid index table schema.", K(ret), KP(delta_buffer_schema), KP(table_schema)); + } else if (OB_FAIL(ObVectorIndexUtil::get_vector_index_column_id(*table_schema, *delta_buffer_schema, vector_column_ids))) { + LOG_WARN("failed to get vector index column id.", K(ret)); + } else if (vector_column_ids.count() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get vector column id count invalid.", K(ret), K(vector_column_ids.count())); + } + + return ret; +} + + +int ObPluginVectorIndexUtils::read_vector_info(ObPluginVectorIndexAdaptor *adapter, + ObIAllocator &allocator, + ObLSID &ls_id, + SCN target_scn, + ObVectorQueryAdaptorResultContext &ada_ctx) +{ + INIT_SUCC(ret); + uint64_t vid_id_table_table_id = adapter->get_vid_rowkey_table_id(); + uint64_t data_table_table_id = adapter->get_data_table_id(); + schema::ObTableParam vid_table_param(allocator); + schema::ObTableParam data_table_param(allocator); + common::ObNewRowIterator *vid_id_iter = nullptr; + common::ObNewRowIterator *data_iter = nullptr; + schema::ObIndexType type = INDEX_TYPE_VEC_VID_ROWKEY_LOCAL; + ObObj *output_obj = nullptr; + ObAccessService *tsc_service = MTL(ObAccessService *); + + SMART_VARS_2((storage::ObTableScanParam, vid_id_scan_param), + (storage::ObTableScanParam, data_scan_param)) { + if (ada_ctx.get_count() == 0) { + // do noting + } else if (OB_ISNULL(output_obj = static_cast(allocator.alloc(sizeof(ObObj) * ada_ctx.get_count())))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc mem.", K(ret)); + } else if (OB_FAIL(read_local_tablet(ls_id, + adapter, + target_scn, + type, + allocator, + vid_id_scan_param, + vid_table_param, + vid_id_iter))) { + LOG_WARN("failed to read vid id table local tablet.", K(ret)); + } else if (OB_FAIL(read_local_tablet(ls_id, + adapter, + target_scn, + INDEX_TYPE_IS_NOT, + allocator, + data_scan_param, + data_table_param, + data_iter))) { + LOG_WARN("failed to read data table local tablet.", K(ret)); + } else { + bool get_data = false; + void *buf = nullptr; + ObObj *obj_ptr = nullptr; + int32_t data_table_rowkey_count = vid_table_param.get_output_projector().count() - 1; + LOG_INFO("data_table_rowkey_count", K(data_table_rowkey_count)); + if (data_table_rowkey_count <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get data table rowkey count invalid.", K(ret), K(data_table_rowkey_count)); + } else { + if (OB_ISNULL(buf = allocator.alloc(sizeof(ObObj) * data_table_rowkey_count))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc mem.", K(ret), K(data_table_rowkey_count)); + } else { + obj_ptr = new (buf) ObObj[data_table_rowkey_count]; + } + } + + for (int64_t i = 0; OB_SUCC(ret) && i < ada_ctx.get_count(); i++) { + if (OB_FAIL(read_object_from_vid_rowkey_table_iter(&(ada_ctx.get_vids()[i]), + vid_id_table_table_id, + vid_id_scan_param, + vid_id_iter, + type, + allocator, + obj_ptr, + data_table_rowkey_count))) { + LOG_WARN("failed to read obj from 2nd table.", K(ret)); + } else if (OB_FAIL(read_object_from_data_table_iter(obj_ptr, + data_table_rowkey_count, + data_table_table_id, + data_scan_param, + data_iter, + INDEX_TYPE_IS_NOT, + allocator, + output_obj[i], + get_data))) { + LOG_WARN("failed to read obj from data table.", K(ret)); + } else { + vid_id_scan_param.key_ranges_.pop_back(); + data_scan_param.key_ranges_.pop_back(); + } + } + + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + + if (OB_SUCC(ret)) { + if (get_data) { + ada_ctx.set_vectors(output_obj); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not get anydata", K(ret)); + } + } + } + } + + if (OB_NOT_NULL(tsc_service)) { + int tmp_ret = OB_SUCCESS; + if (OB_NOT_NULL(vid_id_iter)) { + tmp_ret = tsc_service->revert_scan_iter(vid_id_iter); + if (tmp_ret != OB_SUCCESS) { + LOG_WARN("revert vid_id_iter failed", K(ret)); + } + } + vid_id_iter = nullptr; + if (OB_NOT_NULL(data_iter)) { + tmp_ret = tsc_service->revert_scan_iter(data_iter); + if (tmp_ret != OB_SUCCESS) { + LOG_WARN("revert data_iter failed", K(ret)); + } + } + vid_id_iter = nullptr; + } + + return ret; +} +// debug interface, remove later +int ObPluginVectorIndexUtils::test_read_local_data(ObLSID &ls_id, + ObPluginVectorIndexAdaptor *adapter, + ObIndexType index_type, + SCN target_scn, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + storage::ObTableScanParam scan_param; + schema::ObTableParam table_param(allocator); + common::ObNewRowIterator *table_iter = nullptr; + ObAccessService *tsc_service = MTL(ObAccessService *); + + if (OB_ISNULL(adapter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid adapter", K(ret), KPC(adapter)); + } else if (OB_FAIL(read_local_tablet(ls_id, + adapter, + target_scn, + index_type, + allocator, + scan_param, + table_param, + table_iter))) { + LOG_WARN("fail to read local tablet", KR(ret), K(ls_id), K(index_type), KPC(adapter)); + } else { + ObTableScanIterator *table_scan_iter = static_cast(table_iter); + bool read_finish = false; + int row_cnt = 0; + while(OB_SUCC(ret) && !read_finish) { + blocksstable::ObDatumRow* datum_row = nullptr; + if (OB_FAIL(table_scan_iter->get_next_row(datum_row))) { + if (OB_ITER_END == ret) { + LOG_INFO("dump local read finished", K(row_cnt), K(index_type)); + read_finish = true; + ret = OB_SUCCESS; + } else { + LOG_WARN("dump local read fail to get next row", KR(ret), K(row_cnt), K(index_type)); + } + } else if (FALSE_IT(row_cnt++)) { + } else if (OB_ISNULL(datum_row)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("dump local read row is null.", K(ret), K(index_type)); + } else { + // print for debug + LOG_INFO("dump local read row", K(row_cnt), K(index_type), KPC(datum_row)); + } + } + } + if (OB_NOT_NULL(table_iter) && OB_NOT_NULL(tsc_service)) { + int tmp_ret = tsc_service->revert_scan_iter(table_iter); + if (tmp_ret != OB_SUCCESS) { + LOG_WARN("revert test table_iter failed", K(ret)); + } + } + return ret; +} + +int ObPluginVectorIndexUtils::try_sync_vbitmap_memdata(ObLSID &ls_id, + ObPluginVectorIndexAdaptor *adapter, + SCN &target_scn, + ObIAllocator &allocator, + ObVectorQueryAdaptorResultContext &ada_ctx) +{ + int ret = OB_SUCCESS; + schema::ObIndexType index_type = INDEX_TYPE_VEC_INDEX_ID_LOCAL; + ObAccessService *tsc_service = MTL(ObAccessService *); + common::ObNewRowIterator *index_id_iter = nullptr; + storage::ObTableScanParam vbitmap_scan_param; + schema::ObTableParam vbitmap_table_param(allocator); + + if (OB_FAIL(read_local_tablet(ls_id, + adapter, + target_scn, + index_type, + allocator, + vbitmap_scan_param, + vbitmap_table_param, + index_id_iter))) { // read_local_tablet 4rd aux index get rowkey, backword + LOG_WARN("fail to read local tablet", KR(ret), K(ls_id), K(index_type), KPC(adapter)); + } else if (OB_FAIL(adapter->check_index_id_table_readnext_status(&ada_ctx, index_id_iter, target_scn))) { + LOG_WARN("fail to check and sync vbitmap.", KR(ret)); + } // ToDo: may also need to sync vector to incr memdata + + if (OB_NOT_NULL(index_id_iter) && OB_NOT_NULL(tsc_service)) { + int tmp_ret = tsc_service->revert_scan_iter(index_id_iter); + if (tmp_ret != OB_SUCCESS) { + LOG_WARN("revert index_id_iter failed", K(ret)); + } + index_id_iter = nullptr; + } + + return ret; +} + +int ObPluginVectorIndexUtils::try_sync_snapshot_memdata(ObLSID &ls_id, + ObPluginVectorIndexAdaptor *adapter, + SCN &target_scn, + ObIAllocator &allocator, + ObVectorQueryAdaptorResultContext &ada_ctx) +{ + int ret = OB_SUCCESS; + schema::ObIndexType index_type = INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL; + ObAccessService *tsc_service = MTL(ObAccessService *); + common::ObNewRowIterator *snapshot_idx_iter = nullptr; + storage::ObTableScanParam snapshot_scan_param; + schema::ObTableParam snapshot_table_param(allocator); + + if (OB_FAIL(read_local_tablet(ls_id, + adapter, + target_scn, + index_type, + allocator, + snapshot_scan_param, + snapshot_table_param, + snapshot_idx_iter))) { // read_local_tablet 5th aux index get rowkey + LOG_WARN("fail to read local tablet", KR(ret), K(ls_id), K(index_type), KPC(adapter)); + } else if (OB_FAIL(adapter->try_init_mem_data(VIRT_SNAP))) { + LOG_WARN("try init snap mem data failed.", K(ret)); + } else { + ObHNSWDeserializeCallback::CbParam param; + param.iter_ = snapshot_idx_iter; + param.allocator_ = &allocator; + + ObHNSWDeserializeCallback callback; + ObIStreamBuf::Callback cb = callback; + // ToDo: concurrency with weakread + ObVectorIndexSerializer index_seri(allocator); + ObVectorIndexMemData *snap_memdata = adapter->get_snap_data_(); + if (OB_FAIL(adapter->try_init_mem_data(VIRT_SNAP))) { + LOG_WARN("failed to init snapshot index.", K(ret)); + } else if (OB_ISNULL(snap_memdata)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid snap memdata", K(ret), KPC(adapter)); + } else if (OB_FAIL(index_seri.deserialize(snap_memdata->index_, param, cb))) { + LOG_WARN("serialize index failed.", K(ret)); + } else { + adapter->close_snap_data_rb_flag(); + } + } + + if (OB_NOT_NULL(snapshot_idx_iter) && OB_NOT_NULL(tsc_service)) { + int tmp_ret = tsc_service->revert_scan_iter(snapshot_idx_iter); + if (tmp_ret != OB_SUCCESS) { + LOG_WARN("revert snapshot_idx_iter failed", K(ret)); + } + snapshot_idx_iter = nullptr; + } + return ret; +} + +int ObPluginVectorIndexUtils::refresh_memdata(ObLSID &ls_id, + ObPluginVectorIndexAdaptor *adapter, + SCN target_scn, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + + // ToDo: remove test interface later +#if 0 + if (OB_FAIL(test_read_local_data(ls_id, adapter, INDEX_TYPE_VEC_ROWKEY_VID_LOCAL, target_scn, allocator))) { + LOG_WARN("fail to test read local data.", K(ret), K(ls_id), K(INDEX_TYPE_VEC_ROWKEY_VID_LOCAL)); + } else if (OB_FAIL(test_read_local_data(ls_id, adapter, INDEX_TYPE_VEC_VID_ROWKEY_LOCAL, target_scn, allocator))) { + LOG_WARN("fail to test read local data.", K(ret), K(ls_id), K(INDEX_TYPE_VEC_VID_ROWKEY_LOCAL)); + } else if (OB_FAIL(test_read_local_data(ls_id, adapter, INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL, target_scn, allocator))) { + LOG_WARN("fail to test read local data.", K(ret), K(ls_id), K(INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL)); + } else if (OB_FAIL(test_read_local_data(ls_id, adapter, INDEX_TYPE_VEC_INDEX_ID_LOCAL, target_scn, allocator))) { + LOG_WARN("fail to test read local data.", K(ret), K(ls_id), K(INDEX_TYPE_VEC_INDEX_ID_LOCAL)); + } else if (OB_FAIL(test_read_local_data(ls_id, adapter, INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL, target_scn, allocator))) { + LOG_WARN("fail to test read local data.", K(ret), K(ls_id), K(INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL)); + } else if (OB_FAIL(test_read_local_data(ls_id, adapter, INDEX_TYPE_IS_NOT, target_scn, allocator))) { + LOG_WARN("fail to test read local data.", K(ret), K(ls_id), K(INDEX_TYPE_IS_NOT)); + } +#endif + + common::ObNewRowIterator *delta_buf_iter = nullptr; + ObAccessService *tsc_service = MTL(ObAccessService *); + storage::ObTableScanParam inc_scan_param; + schema::ObTableParam inc_table_param(allocator); + ObArenaAllocator tmp_allocator("VectorAdaptor", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + + if (OB_ISNULL(adapter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid adapter", K(ret), KPC(adapter)); + } else if (OB_FAIL(read_local_tablet(ls_id, + adapter, + target_scn, + INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL, + allocator, + inc_scan_param, + inc_table_param, + delta_buf_iter))) { + LOG_WARN("fail to read local tablet", KR(ret), K(ls_id), K(INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL), KPC(adapter)); + } else { + ObVectorQueryAdaptorResultContext ada_ctx(&allocator, &tmp_allocator); + if (OB_FAIL(adapter->check_delta_buffer_table_readnext_status(&ada_ctx, delta_buf_iter, target_scn))) { + LOG_WARN("fail to check_delta_buffer_table_readnext_status.", K(ret)); + } else if (OB_FAIL(try_sync_vbitmap_memdata(ls_id, adapter, target_scn, allocator, ada_ctx))) { + LOG_WARN("failed to sync vbitmap", KR(ret)); + } else if (ada_ctx.get_status() == PVQ_COM_DATA) { + if (OB_FAIL(read_vector_info(adapter, allocator, ls_id, target_scn, ada_ctx))) { + LOG_WARN("failed to read vector_info", KR(ret)); + } else if (OB_FAIL(adapter->complete_delta_buffer_table_data(&ada_ctx))) { + LOG_WARN("failed to complete delta buffer", KR(ret)); + } else if (OB_FAIL(try_sync_snapshot_memdata(ls_id, adapter, target_scn, allocator, ada_ctx))) { + LOG_WARN("failed to refresh mem snapshots", KR(ret)); + } + } else if (ada_ctx.get_status() == PVQ_LACK_SCN) { + if (OB_FAIL(try_sync_snapshot_memdata(ls_id, adapter, target_scn, allocator, ada_ctx))) { + LOG_WARN("failed to refresh mem snapshots without refresh incr", KR(ret)); + } + } + } + + if (OB_NOT_NULL(delta_buf_iter) && OB_NOT_NULL(tsc_service)) { + int tmp_ret = tsc_service->revert_scan_iter(delta_buf_iter); + if (tmp_ret != OB_SUCCESS) { + LOG_WARN("revert delta_buf_iter failed", K(ret)); + } + delta_buf_iter = nullptr; + } + return ret; +} + +static bool is_non_shared_vec_index_aux_table(schema::ObIndexType type) +{ + bool bret = false; + bret = (is_vec_delta_buffer_type(type) + || is_vec_index_id_type(type) + || is_vec_index_snapshot_data_type(type)); + return bret; +} + +int ObPluginVectorIndexUtils::read_local_tablet(ObLSID &ls_id, + ObPluginVectorIndexAdaptor* adapter, + SCN target_scn, + schema::ObIndexType type, + ObIAllocator &allocator, + ObTableScanParam &scan_param, + ObTableParam &table_param, + common::ObNewRowIterator *&scan_iter) +{ + int ret = OB_SUCCESS; + ObAccessService *tsc_service = MTL(ObAccessService *); + scan_iter = nullptr; + + // init scan param refer to ObLocalIndexLookupOp::init_scan_param() + // assign ls_id, tablet_id, tx_snapshot + // set need_scn_ = true if need ora_rowscn + ObTabletID tablet_id; + uint64_t table_id = OB_INVALID_ID; + ObTabletHandle tablet_handle; + ObLSHandle ls_handle; + + // INDEX_TYPE_IS_NOT means data tablet + if (!ls_id.is_valid() || OB_ISNULL(adapter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid ls id or adapter", KR(ret), K(ls_id), KPC(adapter)); + } else if (is_vec_delta_buffer_type(type)) { + tablet_id = adapter->get_inc_tablet_id(); + table_id = adapter->get_inc_table_id(); + } else if (is_vec_index_id_type(type)) { + tablet_id = adapter->get_vbitmap_tablet_id(); + table_id = adapter->get_vbitmap_table_id(); + } else if (is_vec_index_snapshot_data_type(type)) { + tablet_id = adapter->get_snap_tablet_id(); + table_id = adapter->get_snapshot_table_id(); + } else if (is_vec_rowkey_vid_type(type)) { + tablet_id = adapter->get_rowkey_vid_tablet_id(); + table_id = adapter->get_rowkey_vid_table_id(); + } else if (is_vec_vid_rowkey_type(type)) { + tablet_id = adapter->get_vid_rowkey_tablet_id(); + table_id = adapter->get_vid_rowkey_table_id(); + } else if (type == INDEX_TYPE_IS_NOT) { + tablet_id = adapter->get_data_tablet_id(); + table_id = adapter->get_data_table_id(); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index type", KR(ret), K(type)); + } + + if (OB_SUCC(ret)) { + LOG_INFO("read table tablet", K(ls_id), K(tablet_id), K(table_id), K(type), K(target_scn)); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::SHARE_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(ls_id)); + } else if (OB_ISNULL(ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("ls should not be null", K(ret)); + } else if (OB_FAIL(ls_handle.get_ls()->get_tablet_with_timeout(tablet_id, + tablet_handle, + 0, // timeout + ObMDSGetTabletMode::READ_READABLE_COMMITED, + target_scn))) { + LOG_WARN("fail to get tablet handle", KR(ret), K(tablet_id)); + } else { + uint64_t tenant_id = MTL_ID(); + scan_param.ls_id_ = ls_id; + scan_param.tablet_id_ = tablet_id; + scan_param.schema_version_ = tablet_handle.get_obj()->get_tablet_meta().max_sync_storage_schema_version_; + if (OB_FAIL(init_common_scan_param(scan_param, adapter, target_scn, &allocator, type, table_id))) { + LOG_WARN("fail to init common scan param", KR(ret), KPC(adapter)); + } else if (OB_FAIL(init_table_param(&table_param, + adapter->get_inc_table_id(), + adapter->get_data_table_id(), + table_id, + type, + adapter))) { + LOG_WARN("fail to init table param", KR(ret), KPC(adapter)); + } else if (FALSE_IT(scan_param.table_param_ = &table_param)) { + } else { + common::ObNewRange range; + void *buf = nullptr; + uint32_t col_cnt = 0; + if (is_non_shared_vec_index_aux_table(type)) { + if (OB_FAIL(get_non_shared_index_aux_table_rowkey_colum_count(type, col_cnt))) { + LOG_WARN("fail to get index aux table colum count", KR(ret), K(type)); + } else if (OB_ISNULL(buf = allocator.alloc(sizeof(ObObj) * col_cnt * 2))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("alloc scan range obj failed.", K(ret)); + } else { + ObObj *row_objs = reinterpret_cast(buf); + for (int i = 0; i < col_cnt; i++) { + row_objs[i] = ObObj::make_min_obj(); + } + ObRowkey min_row_key(row_objs, col_cnt); + for (int j = col_cnt; j < col_cnt * 2; j++) { + row_objs[j] = ObObj::make_max_obj(); + } + ObRowkey max_row_key(row_objs + col_cnt, col_cnt); + + range.table_id_ = table_id; + range.start_key_ = min_row_key; + range.end_key_ = max_row_key; + range.border_flag_.set_inclusive_start(); + range.border_flag_.set_inclusive_end(); + } + } else { + // vid_rowkey table or data table, get rowkey while complete + if (OB_FAIL(get_shared_table_rowkey_colum_count(type, table_id, col_cnt))) { + LOG_WARN("fail to get index aux table colum count", KR(ret), K(type)); + } else if (OB_ISNULL(buf = allocator.alloc(sizeof(ObObj) * col_cnt * 2))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("alloc scan range obj failed.", K(ret)); + } else { + ObObj *row_objs = reinterpret_cast(buf); + for (int i = 0; i < col_cnt; i++) { + row_objs[i] = ObObj::make_min_obj(); + } + ObRowkey min_row_key(row_objs, col_cnt); + for (int j = col_cnt; j < col_cnt * 2; j++) { + row_objs[j] = ObObj::make_max_obj(); + } + ObRowkey max_row_key(row_objs + col_cnt, col_cnt); + + range.table_id_ = table_id; + range.start_key_ = min_row_key; + range.end_key_ = max_row_key; + range.border_flag_.set_inclusive_start(); + range.border_flag_.set_inclusive_end(); + } + } + + // need lob helper for read aux table 5? + scan_param.key_ranges_.reset(); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(scan_param.key_ranges_.push_back(range))) { + LOG_WARN("failed to push key range.", K(ret), K(scan_param), K(range)); + } else { + ObAccessService *oas = MTL(ObAccessService*); + if (OB_ISNULL(oas)) { + ret = OB_ERR_INTERVAL_INVALID; + LOG_WARN("get access service failed.", K(ret)); + } else if (OB_FAIL(oas->table_scan(scan_param, scan_iter))) { + LOG_WARN("do table scan falied.", K(ret), K(scan_param)); + } + } + + if (OB_NOT_NULL(buf)) { + allocator.free(buf); + } + } + } + + return ret; +} + +int ObPluginVectorIndexUtils::init_common_scan_param(storage::ObTableScanParam& scan_param, + ObPluginVectorIndexAdaptor *adapter, + SCN target_scn, + ObIAllocator *allocator, + ObIndexType type, + uint64_t table_id) +{ + // fix validate adapter & allocator + // refer to ObPersistentLobApator::build_common_scan_param + int ret = OB_SUCCESS; + ObQueryFlag query_flag(is_vec_index_id_type(type) ? ObQueryFlag::Reverse : ObQueryFlag::Forward, // scan_order + false, // daily_merge + false, // optimize + false, // sys scan + true, // full_row + false, // index_back + false, // query_stat + ObQueryFlag::MysqlMode, // sql_mode + false // read_latest + ); + query_flag.disable_cache(); + query_flag.scan_order_ = is_vec_index_id_type(type) ? ObQueryFlag::Reverse : ObQueryFlag::Forward; + scan_param.scan_flag_.flag_ = query_flag.flag_; + // set column ids + scan_param.column_ids_.reset(); + uint32 col_cnt = 0; + + if (is_vec_index(type) || type == INDEX_TYPE_IS_NOT){ + if (OB_FAIL(get_special_index_aux_table_column_count(type, table_id, col_cnt, scan_param))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index type", KR(ret), K(type)); + } + } + + if (OB_SUCC(ret)) { + scan_param.reserved_cell_count_ = scan_param.column_ids_.count(); + // table param + scan_param.index_id_ = 0; + scan_param.is_get_ = false; + // set timeout + scan_param.timeout_ = INT64_MAX; + // scan_param.virtual_column_exprs_ + scan_param.limit_param_.limit_ = -1; + scan_param.limit_param_.offset_ = 0; + // sessions + + scan_param.snapshot_.init_weak_read(target_scn); + + // never read_latest + // if(param.read_latest_) { + // scan_param.tx_id_ = param.snapshot_.core_.tx_id_; + // } + scan_param.sql_mode_ = SMO_DEFAULT; + // common set + scan_param.allocator_ = allocator; + scan_param.for_update_ = false; + scan_param.for_update_wait_timeout_ = scan_param.timeout_; + scan_param.scan_allocator_ = allocator; + scan_param.frozen_version_ = -1; + scan_param.force_refresh_lc_ = false; + scan_param.output_exprs_ = nullptr; + scan_param.aggregate_exprs_ = nullptr; + scan_param.op_ = nullptr; + scan_param.row2exprs_projector_ = nullptr; + scan_param.need_scn_ = false; + scan_param.pd_storage_flag_ = false; + // not flashback + // scan_param.fb_snapshot_ = param.fb_snapshot_; + } + return ret; +} + +int ObPluginVectorIndexUtils::init_table_param(ObTableParam *table_param, + uint64_t inc_table_id, + uint64_t data_table_id, + uint64_t table_id, + schema::ObIndexType type, + ObPluginVectorIndexAdaptor *adapter) +{ + int ret = OB_SUCCESS; + int64_t schema_version = OB_INVALID_VERSION; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *table_schema = NULL; + ObSEArray column_ids; + if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard(MTL_ID(), schema_guard))) { + LOG_WARN("fail to get schema guard", KR(ret), K(MTL_ID())); + } else if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), table_id, table_schema))) { + LOG_WARN("fail to get schema", KR(ret), KR(table_id)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_TABLE_NOT_EXIST; // table may be removed, handle in scheduler routine + LOG_WARN("get null table schema", KR(ret), KR(table_id)); + } else if (is_vec_delta_buffer_type(type)) { + ObArray tmp_column_ids; + const ObTableSchema *data_table_schema = NULL; + if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), data_table_id, data_table_schema))) { + LOG_WARN("fail to get schema", KR(ret), KR(data_table_id)); + } else if (OB_ISNULL(table_schema) || OB_ISNULL(data_table_schema)) { + ret = OB_TABLE_NOT_EXIST; // table may be removed, handle in scheduler routine + LOG_WARN("get null table schema", KR(ret), K(table_id), K(data_table_id)); + } else if (OB_FAIL(table_schema->get_column_ids(tmp_column_ids))) { + LOG_ERROR("fail to get index table all column ids", K(table_schema), KPC(adapter)); + } else if (OB_FAIL(table_schema->get_column_ids(tmp_column_ids))) { + LOG_ERROR("fail to get index table all column ids", K(table_schema), KPC(adapter)); + } else if (tmp_column_ids.count() < 3) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected column count", K(tmp_column_ids.count())); + } else { + // need [vid][type][vector] + uint64_t vid_column_id = 0; + uint64_t type_column_id = 0; + uint64_t vector_column_id = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < tmp_column_ids.count(); ++i) { + const ObColumnSchemaV2 *col_schema = data_table_schema->get_column_schema(tmp_column_ids[i]); + if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (col_schema->is_vec_vid_column()) { + vid_column_id = col_schema->get_column_id(); + } else if (col_schema->is_vec_type_column()) { + type_column_id = col_schema->get_column_id(); + } else if (col_schema->is_vec_vector_column()) { + vector_column_id = col_schema->get_column_id(); + } + } + if (OB_FAIL(ret)) { + } else if (vid_column_id == 0 || type_column_id == 0 || vector_column_id == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get valid column id", K(ret), K(vid_column_id), K(type_column_id), K(vector_column_id)); + } else if (OB_FAIL(column_ids.push_back(vid_column_id))) { + LOG_WARN("failed to push 2nd column id.", K(ret)); + } else if (OB_FAIL(column_ids.push_back(type_column_id))) { + LOG_WARN("failed to push 3rd column id.", K(ret)); + } else if (OB_FAIL(column_ids.push_back(vector_column_id))) { + LOG_WARN("failed to push 4th column id.", K(ret)); + } else if (OB_FAIL(table_param->convert(*table_schema, column_ids, sql::ObStoragePushdownFlag()))) { + LOG_ERROR("fail to convert table param", KR(ret), K(table_schema), K(type)); + } + } + } else if (is_vec_index_id_type(type)) { + // different with other index, refer to ObTscCgService::extract_vec_ir_access_columns + ObArray tmp_column_ids; + const ObTableSchema *data_table_schema = NULL; + if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), data_table_id, data_table_schema))) { + LOG_WARN("fail to get schema", KR(ret), KR(data_table_id)); + } else if (OB_ISNULL(table_schema) || OB_ISNULL(data_table_schema)) { + ret = OB_TABLE_NOT_EXIST; // table may be removed, handle in scheduler routine + LOG_WARN("get null table schema", KR(ret), K(table_id), K(data_table_id)); + } else if (OB_FAIL(table_schema->get_column_ids(tmp_column_ids))) { + LOG_ERROR("fail to get index table all column ids", K(table_schema), KPC(adapter)); + } else if (OB_FAIL(table_schema->get_column_ids(tmp_column_ids))) { + LOG_ERROR("fail to get index table all column ids", K(table_schema), KPC(adapter)); + } else if (tmp_column_ids.count() < 4) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected column count", K(tmp_column_ids.count())); + } else { + // need [scn][vid][type][vector] + uint64_t scn_column_id = 0; + uint64_t vid_column_id = 0; + uint64_t type_column_id = 0; + uint64_t vector_column_id = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < tmp_column_ids.count(); ++i) { + const ObColumnSchemaV2 *col_schema = data_table_schema->get_column_schema(tmp_column_ids[i]); + if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (col_schema->is_vec_scn_column()) { + scn_column_id = col_schema->get_column_id(); + } else if (col_schema->is_vec_vid_column()) { + vid_column_id = col_schema->get_column_id(); + } else if (col_schema->is_vec_type_column()) { + type_column_id = col_schema->get_column_id(); + } else if (col_schema->is_vec_vector_column()) { + vector_column_id = col_schema->get_column_id(); + } + } + if (OB_FAIL(ret)) { + } else if (scn_column_id == 0 || vid_column_id == 0 || type_column_id == 0 || vector_column_id == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get valid column id", K(ret), K(scn_column_id), K(vid_column_id), K(type_column_id), K(vector_column_id)); + } else if (OB_FAIL(column_ids.push_back(scn_column_id))) { + LOG_WARN("failed to push 1st column id.", K(ret)); + } else if (OB_FAIL(column_ids.push_back(vid_column_id))) { + LOG_WARN("failed to push 2nd column id.", K(ret)); + } else if (OB_FAIL(column_ids.push_back(type_column_id))) { + LOG_WARN("failed to push 3rd column id.", K(ret)); + } else if (OB_FAIL(column_ids.push_back(vector_column_id))) { + LOG_WARN("failed to push 4th column id.", K(ret)); + } else if (OB_FAIL(table_param->convert(*table_schema, column_ids, sql::ObStoragePushdownFlag()))) { + LOG_ERROR("fail to convert table param", KR(ret), K(table_schema), K(type)); + } + } + } else if (is_vec_vid_rowkey_type(type)) { + uint64_t vid_column_id = 0; + ObSEArray tmp_column_ids; + if (OB_FAIL(table_schema->get_column_ids(tmp_column_ids))) { + LOG_ERROR("fail to get index table all column ids", K(table_schema), KPC(adapter)); + } else { + // make sure vid column is the first output column + for (int64_t i = 0; OB_SUCC(ret) && i < table_schema->get_column_count() && vid_column_id == 0; ++i) { + const ObColumnSchemaV2 *col_schema = table_schema->get_column_schema_by_idx(i); + if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (col_schema->is_vec_vid_column()) { + vid_column_id = col_schema->get_column_id(); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(column_ids.push_back(vid_column_id))) { + LOG_WARN("failed to push 1st column id.", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < tmp_column_ids.count(); ++i) { + if (tmp_column_ids[i] != vid_column_id) { + if (OB_FAIL(column_ids.push_back(tmp_column_ids[i]))) { + LOG_WARN("failed to push column id.", K(ret), K(i), K(tmp_column_ids[i]), K(vid_column_id)); + } + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(table_param->convert(*table_schema, column_ids, sql::ObStoragePushdownFlag()))) { + LOG_ERROR("fail to convert table param", KR(ret), K(table_schema), K(type)); + } + } + } else if (is_vec_index_snapshot_data_type(type)) { + const ObTableSchema *data_table_schema = NULL; + ObSEArray tmp_column_ids; + if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), data_table_id, data_table_schema))) { + LOG_WARN("fail to get schema", KR(ret), KR(data_table_id)); + } else if (OB_ISNULL(table_schema) || OB_ISNULL(data_table_schema)) { + ret = OB_TABLE_NOT_EXIST; // table may be removed, handle in scheduler routine + LOG_WARN("get null table schema", KR(ret), K(table_id), K(data_table_id)); + } else if (OB_FAIL(table_schema->get_column_ids(tmp_column_ids))) { + LOG_ERROR("fail to get index table all column ids", K(table_schema), KPC(adapter)); + } else { + uint64_t key_column_id = 0; + uint64_t lob_data_column_id = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < tmp_column_ids.count(); ++i) { + const ObColumnSchemaV2 *col_schema = data_table_schema->get_column_schema(tmp_column_ids[i]); + if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (col_schema->is_vec_key_column()) { + key_column_id = col_schema->get_column_id(); + } else if (col_schema->is_vec_data_column()) { + lob_data_column_id = col_schema->get_column_id(); + } + } + if (OB_FAIL(ret)) { + } else if (key_column_id == 0 || lob_data_column_id == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected snapshot data column ids", K(key_column_id), K(lob_data_column_id)); + } else if (OB_FAIL(column_ids.push_back(key_column_id))) { + LOG_WARN("failed to push column id.", K(ret), K(key_column_id)); + } else if (OB_FAIL(column_ids.push_back(lob_data_column_id))) { + LOG_WARN("failed to push column id.", K(ret), K(lob_data_column_id)); + } else { + table_param->get_enable_lob_locator_v2() = true; + table_param->set_is_vec_index(true); + if (OB_FAIL(table_param->convert(*table_schema, column_ids, sql::ObStoragePushdownFlag()))) { + LOG_ERROR("fail to convert table param", KR(ret), K(table_schema), K(type)); + } + } + } + } else if (is_vec_index(type)) { + if (OB_FAIL(table_schema->get_column_ids(column_ids))) { + LOG_ERROR("fail to get index table all column ids", K(table_schema), KPC(adapter)); + } else { + if (OB_FAIL(table_param->convert(*table_schema, column_ids, sql::ObStoragePushdownFlag()))) { + LOG_ERROR("fail to convert table param", KR(ret), K(table_schema), K(type)); + } + } + } else if (type == INDEX_TYPE_IS_NOT) { + if (OB_FAIL(get_vec_column_id(column_ids, inc_table_id, table_id))) { + LOG_WARN("failed to get vec column id.", K(ret)); + } else { + if (OB_FAIL(table_param->convert(*table_schema, column_ids, sql::ObStoragePushdownFlag()))) { + LOG_WARN("failed to convert table param.", K(ret)); + } + } + } + return ret; +} + +int ObPluginVectorIndexUtils::get_non_shared_index_aux_table_colum_count(schema::ObIndexType type, uint32 &col_cnt) +{ + static const uint32 delta_buffer_tab_col_cnt = 3; // vid, type, vector, "ora_rowscn", 3 or 4 columns + static const uint32 index_id_tab_col_cnt = 4; // scn ,vid, type, vector + static const uint32 index_snapshot_tab_col_cnt = 2; // key, data + + int ret = OB_SUCCESS; + col_cnt = 0; + if (is_vec_delta_buffer_type(type)) { + col_cnt = delta_buffer_tab_col_cnt; + } else if (is_vec_index_id_type(type)) { + col_cnt = index_id_tab_col_cnt; + } else if (is_vec_index_snapshot_data_type(type)) { + col_cnt = index_snapshot_tab_col_cnt; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index type", KR(ret), K(type)); + } + if (OB_SUCC(ret)) { + LOG_INFO("get_non_shared_index_aux_table_colum_count", K(type), K(col_cnt)); // remove after debug; + } + return ret; +} + +int ObPluginVectorIndexUtils::get_special_index_aux_table_column_count( + schema::ObIndexType type, + uint64_t table_id, + uint32 &col_cnt, + storage::ObTableScanParam& scan_param) +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *table_schema = NULL; + ObSEArray column_ids; + if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard(MTL_ID(), schema_guard))) { + LOG_WARN("fail to get schema guard", KR(ret), K(MTL_ID())); + } else if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), table_id, table_schema))) { + LOG_WARN("fail to get schema", KR(ret), K(table_id)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_TABLE_NOT_EXIST; // table may be removed, handle in scheduler routine + LOG_WARN("get null table schema", KR(ret), K(table_id)); + } else if (OB_FAIL(table_schema->get_column_ids(column_ids))) { + LOG_ERROR("fail to get index table all column ids", K(table_schema)); + } else if (OB_FAIL(scan_param.column_ids_.assign(column_ids))) { + LOG_WARN("failed to assign column ids.", K(ret)); + } else { + col_cnt = column_ids.count(); + } + if (OB_SUCC(ret)) { + LOG_INFO("get_special_index_aux_table_column_count", K(type), K(col_cnt), K(column_ids)); // remove after debug; + } + return ret; +} + +int ObPluginVectorIndexUtils::get_non_shared_index_aux_table_rowkey_colum_count(schema::ObIndexType type, uint32 &col_cnt) +{ + // only need to do range scan for aux index table 3, 4, 5 + // other tables only needs multi get + static const uint32 delta_buffer_tab_col_cnt = 2; // rowkey:vid, type, other:vector, "ora_rowscn" + static const uint32 index_id_tab_col_cnt = 3; // rowkey:scn ,vid, type, other:vector + static const uint32 index_snapshot_tab_col_cnt = 1; // rowkey:key, other:data + + int ret = OB_SUCCESS; + col_cnt = 0; + if (is_vec_delta_buffer_type(type)) { + col_cnt = delta_buffer_tab_col_cnt; + } else if (is_vec_index_id_type(type)) { + col_cnt = index_id_tab_col_cnt; + } else if (is_vec_index_snapshot_data_type(type)) { + col_cnt = index_snapshot_tab_col_cnt; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index type", KR(ret), K(type)); + } + if (OB_SUCC(ret)) { + LOG_INFO("get_non_shared_index_aux_table_rowkey_colum_count", K(type), K(col_cnt)); // remove after debug; + } + return ret; +} + +int ObPluginVectorIndexUtils::get_shared_table_rowkey_colum_count(schema::ObIndexType type, + uint64_t table_id, + uint32 &col_cnt) +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *table_schema = NULL; + ObSEArray column_ids; + if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard(MTL_ID(), schema_guard))) { + LOG_WARN("fail to get schema guard", KR(ret), K(MTL_ID())); + } else if (OB_FAIL(schema_guard.get_table_schema(MTL_ID(), table_id, table_schema))) { + LOG_WARN("fail to get schema", KR(ret), KR(table_id)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_TABLE_NOT_EXIST; // table may be removed, handle in scheduler routine + LOG_WARN("get null table schema", KR(ret), KR(table_id)); + } else { + const ObRowkeyInfo &rowkey_info = table_schema->get_rowkey_info(); + if (OB_FAIL(rowkey_info.get_column_ids(column_ids))) { + LOG_WARN("get rowkey_info from table schema faild", KR(ret), KR(table_id), KPC(table_schema)); + } else { + col_cnt = column_ids.count(); + } + } + if (OB_SUCC(ret)) { + LOG_INFO("get_non_shared_index_aux_table_rowkey_colum_count", K(type), K(col_cnt), K(column_ids)); // remove after debug; + } + return ret; +} + +int ObPluginVectorIndexUtils::release_vector_index_adapter(ObPluginVectorIndexAdaptor* &adapter) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(adapter)) { + // do nothing + } else { + if (adapter->dec_ref_and_check_release()) { + ObIAllocator *allocator = adapter->get_allocator(); + if (OB_ISNULL(allocator)) { + const int ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "release vector index adapter failed", KPC(adapter)); + } else { + // OB_LOG(DEBUG, "adatper released", KPC(adapter)); + adapter->~ObPluginVectorIndexAdaptor(); + allocator->free(adapter); + } + adapter = nullptr; + } + } + return ret; +} + +ObVectorIndexRecordType ObPluginVectorIndexUtils::index_type_to_record_type(schema::ObIndexType type) +{ + ObVectorIndexRecordType record_type = VIRT_MAX; + if (schema::is_vec_delta_buffer_type(type)) { + record_type = VIRT_INC; + } else if (schema::is_vec_index_id_type(type)) { + record_type = VIRT_BITMAP; + } else if (schema::is_vec_index_snapshot_data_type(type)) { + record_type = VIRT_SNAP; + } + return record_type; +} + +ObAdapterCreateType ObPluginVectorIndexUtils::index_type_to_create_type(schema::ObIndexType type) +{ + ObAdapterCreateType create_type = CreateTypeMax; + if (schema::is_vec_delta_buffer_type(type)) { + create_type = CreateTypeInc; + } else if (schema::is_vec_index_id_type(type)) { + create_type = CreateTypeBitMap; + } else if (schema::is_vec_index_snapshot_data_type(type)) { + create_type = CreateTypeSnap; + } + return create_type; +} + +int ObPluginVectorIndexUtils::get_vector_index_prefix(const ObTableSchema &index_schema, + ObString &prefix) +{ + int ret = OB_SUCCESS; + prefix.reset(); + if (!index_schema.is_vec_index()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected, not vector index table", K(ret), K(index_schema)); + } else if (index_schema.is_vec_rowkey_vid_type() || index_schema.is_vec_vid_rowkey_type()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vector index type, only support get none share table prefix", + K(ret), K(index_schema)); + } else { + ObString tmp_table_name = index_schema.get_table_name(); + const int64_t table_name_len = tmp_table_name.length(); + + const char* delta_buffer_table = ObVecIndexBuilderUtil::DELTA_BUFFER_TABLE_NAME_SUFFIX; + const char* index_id_table = ObVecIndexBuilderUtil::INDEX_ID_TABLE_NAME_SUFFIX; + const char* index_snapshot_data_table = ObVecIndexBuilderUtil::SNAPSHOT_DATA_TABLE_NAME_SUFFIX; + int64_t prefix_len = 0; + + if (index_schema.is_vec_delta_buffer_type()) { + prefix_len = table_name_len - strlen(delta_buffer_table); + } else if (index_schema.is_vec_index_id_type()) { + prefix_len = table_name_len - strlen(index_id_table); + } else if (index_schema.is_vec_index_snapshot_data_type()) { + prefix_len = table_name_len - strlen(index_snapshot_data_table); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vector index type", K(ret), K(index_schema)); + } + if (OB_SUCC(ret)) { + prefix.assign_ptr(tmp_table_name.ptr(), prefix_len); + LOG_INFO("get_index_prefix", K(prefix), K(tmp_table_name)); + } + } + return ret; +} + +} // namespace share +} // namespace oceanbase diff --git a/src/share/vector_index/ob_plugin_vector_index_utils.h b/src/share/vector_index/ob_plugin_vector_index_utils.h new file mode 100644 index 0000000000..6bbd083eaf --- /dev/null +++ b/src/share/vector_index/ob_plugin_vector_index_utils.h @@ -0,0 +1,145 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#ifndef OCEANBASE_OBSERVER_OB_MOCK_PLUGIN_VECTOR_INDEX_UTILS_DEFINE_H_ +#define OCEANBASE_OBSERVER_OB_MOCK_PLUGIN_VECTOR_INDEX_UTILS_DEFINE_H_ +#include "share/ob_ls_id.h" +#include "share/scn.h" +#include "share/rc/ob_tenant_base.h" +#include "share/vector_index/ob_plugin_vector_index_adaptor.h" +#include "share/schema/ob_schema_struct.h" +#include "storage/access/ob_table_access_param.h" +#include "storage/access/ob_dml_param.h" +#include "storage/tx_storage/ob_access_service.h" +#include "storage/access/ob_table_scan_iterator.h" +#include "common/rowkey/ob_rowkey.h" +#include "src/share/schema/ob_tenant_schema_service.h" +#include "lib/vector/ob_vector_util.h" + +namespace oceanbase +{ +namespace share +{ + +class ObVsagLoggerSingleton +{ +private: + ObVsagLoggerSingleton() {} + +public: + ObVsagLoggerSingleton(const ObVsagLoggerSingleton&) = delete; + ObVsagLoggerSingleton& operator=(const ObVsagLoggerSingleton&) = delete; + + static obvectorutil::ObVsagLogger &getInstance() { + static obvectorutil::ObVsagLogger instance; + return instance; + } + +}; + +class ObPluginVectorIndexUtils +{ +public: + static int get_task_read_snapshot(ObLSID &ls_id, SCN &read_version); + static int refresh_memdata(ObLSID &ls_id, + ObPluginVectorIndexAdaptor *adapter, + SCN target_scn, + ObIAllocator &allocator); + static int release_vector_index_adapter(ObPluginVectorIndexAdaptor* &adapter); + + static ObVectorIndexRecordType index_type_to_record_type(schema::ObIndexType type); + + static ObAdapterCreateType index_type_to_create_type(schema::ObIndexType type); + + static int get_vector_index_prefix(const ObTableSchema &index_schema, ObString &prefix); + static int set_vsag_logger() { + return obvectorutil::init_vasg_logger(&ObVsagLoggerSingleton::getInstance()); + } + + static int add_key_ranges(uint64_t table_id, ObRowkey& rowkey, storage::ObTableScanParam &scan_param); + static int iter_table_rescan(storage::ObTableScanParam &scan_param, common::ObNewRowIterator *iter); + + static int read_object_from_vid_rowkey_table_iter(ObObj *input_obj, + uint64_t table_id, + storage::ObTableScanParam &scan_param, + common::ObNewRowIterator *iter, + schema::ObIndexType type, + ObIAllocator &allocator, + ObObj *&output_obj, + int32_t data_table_rowkey_count); + static int read_object_from_data_table_iter(ObObj *&input_obj, + int32_t data_table_rowkey_count, + uint64_t table_id, + storage::ObTableScanParam &scan_param, + common::ObNewRowIterator *iter, + schema::ObIndexType type, + ObIAllocator &allocator, + ObObj &output_obj, + bool &get_data); + static int get_vec_column_id (ObSEArray &vector_column_ids, + uint64_t incr_index_table_id, + uint64_t data_table_id); + static int read_vector_info(ObPluginVectorIndexAdaptor *adapter, + ObIAllocator &allocator, + ObLSID &ls_id, + SCN target_scn, + ObVectorQueryAdaptorResultContext &ada_ctx); + + static int test_read_local_data(ObLSID &ls_id, + ObPluginVectorIndexAdaptor *adapter, + ObIndexType index_type, + SCN target_scn, + ObIAllocator &allocator); + +private: + static int read_local_tablet(ObLSID &ls_id, + ObPluginVectorIndexAdaptor* adapter, + SCN target_scn, + schema::ObIndexType type, + ObIAllocator &allocator, + ObTableScanParam &scan_param, + ObTableParam &table_param, + common::ObNewRowIterator *&scan_iter); + static int init_common_scan_param(storage::ObTableScanParam& scan_param, + ObPluginVectorIndexAdaptor *adapter, + SCN target_scn, + ObIAllocator *allocator, + schema::ObIndexType type, + uint64_t table_id); + static int init_table_param(ObTableParam *table_param, + uint64_t inc_table_id, + uint64_t data_table_id, + uint64_t table_id, + schema::ObIndexType type, + ObPluginVectorIndexAdaptor *adapter); + static int get_non_shared_index_aux_table_colum_count(schema::ObIndexType type, uint32 &col_cnt); + static int get_non_shared_index_aux_table_rowkey_colum_count(schema::ObIndexType type, uint32 &col_cnt); + static int get_special_index_aux_table_column_count( + schema::ObIndexType type, + uint64_t table_id, + uint32 &col_cnt, + storage::ObTableScanParam& scan_param); + static int get_shared_table_rowkey_colum_count(schema::ObIndexType type, uint64_t table_id, uint32 &col_cnt); + static int try_sync_snapshot_memdata(ObLSID &ls_id, + ObPluginVectorIndexAdaptor *adapter, + SCN &target_scn, + ObIAllocator &allocator, + ObVectorQueryAdaptorResultContext &ada_ctx); + static int try_sync_vbitmap_memdata(ObLSID &ls_id, + ObPluginVectorIndexAdaptor *adapter, + SCN &target_scn, + ObIAllocator &allocator, + ObVectorQueryAdaptorResultContext &ada_ctx); +}; + +} // namespace share +} // namespace oceanbase +#endif \ No newline at end of file diff --git a/src/share/vector_index/ob_vector_index_util.cpp b/src/share/vector_index/ob_vector_index_util.cpp new file mode 100644 index 0000000000..eee2713322 --- /dev/null +++ b/src/share/vector_index/ob_vector_index_util.cpp @@ -0,0 +1,835 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SHARE + +#include "ob_vector_index_util.h" + +namespace oceanbase +{ +namespace share +{ +/* + 预期 index_param_str 是大写的字串 +*/ +int ObVectorIndexUtil::parser_params_from_string( + const ObString &index_param_str, ObVectorIndexHNSWParam ¶m) +{ + int ret = OB_SUCCESS; + ObString tmp_param_str = index_param_str; + ObArray tmp_param_strs; + param.reset(); + if (tmp_param_str.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vector index param, is empty", K(ret)); + } else if (OB_FAIL(split_on(tmp_param_str, ',', tmp_param_strs))) { + LOG_WARN("fail to split func expr", K(ret), K(tmp_param_str)); + } else if (tmp_param_strs.count() < 2) { // at lease two params(distance, type) should be set + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vector index param count", K(tmp_param_strs.count())); + } else { + const int64_t default_m_value = 16; + const int64_t default_ef_construction_value = 200; + const int64_t default_ef_search_value = 64; + const ObVectorIndexAlgorithmLib default_lib = ObVectorIndexAlgorithmLib::VIAL_VSAG; + + for (int64_t i = 0; OB_SUCC(ret) && i < tmp_param_strs.count(); ++i) { + ObString one_tmp_param_str = tmp_param_strs.at(i).trim(); + ObArray one_tmp_param_strs; + if (OB_FAIL(split_on(one_tmp_param_str, '=', one_tmp_param_strs))) { + LOG_WARN("fail to split one param str", K(ret), K(one_tmp_param_str)); + } else if (one_tmp_param_strs.count() != 2) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vector index one param pair count", K(one_tmp_param_strs.count())); + } else { + ObString new_param_name = one_tmp_param_strs.at(0).trim(); + ObString new_param_value = one_tmp_param_strs.at(1).trim(); + + if (new_param_name == "DISTANCE") { + if (new_param_value == "INNER_PRODUCT") { + param.dist_algorithm_ = ObVectorIndexDistAlgorithm::VIDA_IP; + } else if (new_param_value == "L2") { + param.dist_algorithm_ = ObVectorIndexDistAlgorithm::VIDA_L2; + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support vector index dist algorithm", K(ret), K(new_param_value)); + } + } else if (new_param_name == "LIB") { + if (new_param_value == "VSAG") { + param.lib_ = ObVectorIndexAlgorithmLib::VIAL_VSAG; + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support vector index lib", K(ret), K(new_param_value)); + } + } else if (new_param_name == "TYPE") { + if (new_param_value == "HNSW") { + param.type_ = ObVectorIndexAlgorithmType::VIAT_HNSW; + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support vector index type", K(ret), K(new_param_value)); + } + } else if (new_param_name == "M") { + int64_t int_value = 0; + if (OB_FAIL(ObSchemaUtils::str_to_int(new_param_value, int_value))) { + LOG_WARN("fail to str_to_int", K(ret), K(new_param_value)); + } else if (int_value >= 5 && int_value <= 64) { + param.m_ = int_value; + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support vector index m value", K(ret), K(int_value), K(new_param_value)); + } + } else if (new_param_name == "EF_CONSTRUCTION") { + int64_t int_value = 0; + if (OB_FAIL(ObSchemaUtils::str_to_int(new_param_value, int_value))) { + LOG_WARN("fail to str_to_int", K(ret), K(new_param_value)); + } else if (int_value >= 5 && int_value <= 1000) { + param.ef_construction_ = int_value; + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support vector index ef_construction value", K(ret), K(int_value), K(new_param_value)); + } + } else if (new_param_name == "EF_SEARCH") { + int64_t int_value = 0; + if (OB_FAIL(ObSchemaUtils::str_to_int(new_param_value, int_value))) { + LOG_WARN("fail to str_to_int", K(ret), K(new_param_value)); + } else if (int_value >= 1 && int_value <= 1000) { + param.ef_search_ = int_value; + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support vector index ef_search value", K(ret), K(int_value), K(new_param_value)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vector index param name", K(ret), K(new_param_name)); + } + } + } + if (OB_SUCC(ret)) { // if vector parram not set, set default + if (param.m_ == 0) { param.m_ = default_m_value; } + if (param.ef_construction_ == 0) { param.ef_construction_ = default_ef_construction_value; } + if (param.ef_search_ == 0) { param.ef_search_ = default_ef_search_value; } + if (param.lib_ == ObVectorIndexAlgorithmLib::VIAL_MAX) { param.lib_ = default_lib; } + param.dim_ = 0; // TODO@xiajin: fill dim + } + LOG_DEBUG("parser vector index param", K(ret), K(index_param_str), K(param)); + } + return ret; +} + +int ObVectorIndexUtil::get_index_name_prefix( + const schema::ObTableSchema &index_schema, + ObString &prefix) +{ + int ret = OB_SUCCESS; + if (!index_schema.is_vec_index()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected, not vector index table", K(ret), K(index_schema)); + } else if (index_schema.is_vec_rowkey_vid_type() || index_schema.is_vec_vid_rowkey_type()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vector index type, only support get none share table prefix", + K(ret), K(index_schema)); + } else { + ObString tmp_table_name = index_schema.get_table_name(); + const int64_t table_name_len = tmp_table_name.length(); + const char* delta_buffer_table = ""; + const char* index_id_table = "_index_id_table"; + const char* index_snapshot_data_table = "_index_snapshot_data_table"; + int64_t assign_len = 0; + + if (index_schema.is_vec_delta_buffer_type()) { + assign_len = table_name_len - strlen(delta_buffer_table); + } else if (index_schema.is_vec_index_id_type()) { + assign_len = table_name_len - strlen(index_id_table); + } else if (index_schema.is_vec_index_snapshot_data_type()) { + assign_len = table_name_len - strlen(index_snapshot_data_table); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vector index type", K(ret), K(index_schema)); + } + if (OB_SUCC(ret)) { + prefix.assign_ptr(tmp_table_name.ptr(), assign_len); + } + } + return ret; +} + +int ObVectorIndexUtil::check_column_has_vector_index( + const ObTableSchema &data_table_schema, ObSchemaGetterGuard &schema_guard, const int64_t col_id, bool &is_column_has_vector_index) +{ + int ret = OB_SUCCESS; + + ObSEArraysimple_index_infos; + const int64_t tenant_id = data_table_schema.get_tenant_id(); + is_column_has_vector_index = false; + + if (OB_FAIL(data_table_schema.get_simple_index_infos(simple_index_infos))) { + LOG_WARN("fail to get simple index infos failed", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < simple_index_infos.count(); ++i) { + const ObTableSchema *index_table_schema = nullptr; + if (OB_FAIL(schema_guard.get_table_schema(tenant_id, simple_index_infos.at(i).table_id_, index_table_schema))) { + LOG_WARN("fail to get index_table_schema", K(ret), K(tenant_id), "table_id", simple_index_infos.at(i).table_id_); + } else if (OB_ISNULL(index_table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("index table schema should not be null", K(ret), K(simple_index_infos.at(i).table_id_)); + } else if (!index_table_schema->is_vec_index()) { + // skip none vector index + } else if (index_table_schema->is_built_in_vec_index()) { + // skip built in vector index table + } else { + // handle delta_buffer_table index table + const ObRowkeyInfo &rowkey_info = index_table_schema->get_rowkey_info(); + for (int64_t j = 0; OB_SUCC(ret) && !is_column_has_vector_index && j < rowkey_info.get_size(); j++) { + const ObRowkeyColumn *rowkey_column = rowkey_info.get_column(j); + const int64_t column_id = rowkey_column->column_id_; + const ObColumnSchemaV2 *col_schema = nullptr; + if (OB_ISNULL(col_schema = index_table_schema->get_column_schema(column_id))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected col_schema, is nullptr", K(ret), K(column_id), KPC(index_table_schema)); + } else if (col_schema->is_vec_vid_column()) { + // only need vec_type, here skip vec_vid column of delta_buffer_table rowkey column + } else { + // get generated column cascaded column id info + // (vector index table key, like `c1` in "create table xxx vector index idx(c1)") + ObArray cascaded_column_ids; + // get column_schema from data table using generate column id + const ObColumnSchemaV2 *table_column = data_table_schema.get_column_schema(col_schema->get_column_id()); + if (OB_ISNULL(table_column)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected table column", K(ret)); + } else if (OB_FAIL(table_column->get_cascaded_column_ids(cascaded_column_ids))) { + LOG_WARN("failed to get cascaded column ids", K(ret)); + } else { + for (int64_t k = 0; OB_SUCC(ret) && !is_column_has_vector_index && k < cascaded_column_ids.count(); ++k) { + const ObColumnSchemaV2 *cascaded_column = NULL; + ObString new_col_name; + if (OB_ISNULL(cascaded_column = data_table_schema.get_column_schema(cascaded_column_ids.at(k)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected cascaded column", K(ret)); + } else if (cascaded_column->get_column_id() == col_id) { + is_column_has_vector_index = true; + } + } + } + } + } + } + } + } + return ret; +} + +bool ObVectorIndexUtil::has_multi_index_on_same_column(ObIArray &vec_index_cols, const uint64_t col_id) +{ + bool has_same_column_index = false; + for (int64_t i = 0; !has_same_column_index && i < vec_index_cols.count(); ++i) { + if (vec_index_cols.at(i) == col_id) { + has_same_column_index = true; + } + } + return has_same_column_index; +} + +/* need deep copy */ +int ObVectorIndexUtil::insert_index_param_str( + const ObString &new_add_param, ObIAllocator &allocator, ObString ¤t_index_param) +{ + int ret = OB_SUCCESS; + ObString tmp_str = new_add_param; + ObString tmp_new_str; + + if (new_add_param.empty() || !current_index_param.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vector index string", + K(ret), K(new_add_param), K(current_index_param)); + } else if (OB_FAIL(ob_simple_low_to_up(allocator, tmp_str.trim(), tmp_new_str))) { + LOG_WARN("string low to up failed", K(ret), K(tmp_str)); + } else if (OB_FAIL(ob_write_string(allocator, tmp_new_str, current_index_param))){ + LOG_WARN("fail to write vector index param", K(ret), K(tmp_new_str)); + } + + return ret; +} + +int ObVectorIndexUtil::get_vector_index_column_id( + const ObTableSchema &data_table_schema, const ObTableSchema &index_table_schema, ObIArray &col_ids) +{ + INIT_SUCC(ret); + col_ids.reset(); + if (!index_table_schema.is_vec_index()) { + // skip none vector index + } else if (index_table_schema.is_vec_rowkey_vid_type() || index_table_schema.is_vec_vid_rowkey_type()) { + // skip rowkey_vid and vid_rowkey table + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < index_table_schema.get_column_count(); i++) { + const ObColumnSchemaV2 *col_schema = nullptr; + if (OB_ISNULL(col_schema = index_table_schema.get_column_schema_by_idx(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected col_schema, is nullptr", K(ret), K(i), K(index_table_schema)); + } else if (!col_schema->is_vec_vector_column()) { + // only need vec_vector column, here skip other column + } else { + // get generated column cascaded column id info + // (vector index table key, like `c1` in "create table xxx vector index idx(c1)") + ObArray cascaded_column_ids; + // get column_schema from data table using generate column id + const ObColumnSchemaV2 *ori_col_schema = data_table_schema.get_column_schema(col_schema->get_column_id()); + if (OB_ISNULL(ori_col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected ori column", K(ret), K(col_schema->get_column_id()), K(data_table_schema)); + } else if (OB_FAIL(ori_col_schema->get_cascaded_column_ids(cascaded_column_ids))) { + LOG_WARN("failed to get cascaded column ids", K(ret)); + } else { + for (int64_t j = 0; OB_SUCC(ret) && j < cascaded_column_ids.count(); ++j) { + const ObColumnSchemaV2 *cascaded_column = NULL; + uint64_t new_col_id; + if (OB_ISNULL(cascaded_column = data_table_schema.get_column_schema(cascaded_column_ids.at(j)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected cascaded column", K(ret)); + } else if (OB_FALSE_IT(new_col_id = cascaded_column->get_column_id())) { + } else if (OB_FAIL(col_ids.push_back(new_col_id))) { + LOG_WARN("fail to push back col names", K(ret), K(new_col_id)); + } else { + LOG_DEBUG("success to get vector index col name", K(ret), K(new_col_id)); + } + } + } + } + } + } + return ret; +} + +/* + 只支持从3号表得到column name +*/ +int ObVectorIndexUtil::get_vector_index_column_name( + const ObTableSchema &data_table_schema, const ObTableSchema &index_table_schema, ObIArray &col_names) +{ + int ret = OB_SUCCESS; + col_names.reset(); + if (!index_table_schema.is_vec_index()) { + // skip none vector index + } else if (index_table_schema.is_built_in_vec_index()) { + // skip built in vector index table + } else { + // handle delta_buffer_table index table + const ObRowkeyInfo &rowkey_info = index_table_schema.get_rowkey_info(); + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_info.get_size(); i++) { + const ObRowkeyColumn *rowkey_column = rowkey_info.get_column(i); + const int64_t column_id = rowkey_column->column_id_; + const ObColumnSchemaV2 *col_schema = nullptr; + if (OB_ISNULL(col_schema = index_table_schema.get_column_schema(column_id))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected col_schema, is nullptr", K(ret), K(column_id), K(index_table_schema)); + } else if (col_schema->is_vec_vid_column()) { + // only need vec_type, here skip vec_vid column of delta_buffer_table rowkey column + } else { + // get generated column cascaded column id info + // (vector index table key, like `c1` in "create table xxx vector index idx(c1)") + ObArray cascaded_column_ids; + // get column_schema from data table using generate column id + const ObColumnSchemaV2 *table_column = data_table_schema.get_column_schema(col_schema->get_column_id()); + if (OB_ISNULL(table_column)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected table column", K(ret)); + } else if (OB_FAIL(table_column->get_cascaded_column_ids(cascaded_column_ids))) { + LOG_WARN("failed to get cascaded column ids", K(ret)); + } else { + for (int64_t j = 0; OB_SUCC(ret) && j < cascaded_column_ids.count(); ++j) { + const ObColumnSchemaV2 *cascaded_column = NULL; + ObString new_col_name; + if (OB_ISNULL(cascaded_column = data_table_schema.get_column_schema(cascaded_column_ids.at(j)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected cascaded column", K(ret)); + } else if (OB_FALSE_IT(new_col_name = cascaded_column->get_column_name())) { + } else if (OB_FAIL(col_names.push_back(new_col_name))) { + LOG_WARN("fail to push back col names", K(ret), K(new_col_name)); + } else { + LOG_DEBUG("success to get vector index col name", K(ret), K(new_col_name)); + } + } + } + } + } + } + return ret; +} + +/* + 目前只支持单列向量索引。 + */ +int ObVectorIndexUtil::get_vector_dim_from_extend_type_info(const ObIArray &extend_type_info, int64_t &dim) +{ + int ret = OB_SUCCESS; + dim = 0; + if (extend_type_info.count() != 1) { + // Vector index columns currently only support single column vector indexes. + // When building the vector column of the auxiliary table, only one column of extend_type_info is assigned. + ret = OB_NOT_SUPPORTED; + LOG_WARN("unexpected extend type info, current only support one column vector index", + K(ret), K(extend_type_info)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "vector column index only support build on one vector column"); + } else { + ObString extend_type_info_str = extend_type_info.at(0); + ObString spilt_str = extend_type_info_str.split_on('(').trim(); + if (0 == spilt_str.compare("ARRAY")) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("unexpected column type", K(ret), K(spilt_str)); + } else if (0 != spilt_str.compare("VECTOR")) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected column extend info type", K(ret), K(spilt_str)); + } else if (OB_FALSE_IT(spilt_str = extend_type_info_str.split_on(')').trim())) { + } else { + dim = std::atoi(spilt_str.ptr()); + } + } + return ret; +} + +/* + To obtain the dimension of the vector index。 + it is currently only supported to retrieve it from table 345, as only table 345 contains vector column information. +*/ +int ObVectorIndexUtil::get_vector_index_column_dim(const ObTableSchema &index_table_schema, int64_t &dim) +{ + int ret = OB_SUCCESS; + ObSArray all_column_ids; + dim = 0; + if (!index_table_schema.is_vec_index()) { + // skip none vector index + } else if (!index_table_schema.is_vec_delta_buffer_type() && + !index_table_schema.is_vec_index_id_type() && + !index_table_schema.is_vec_index_snapshot_data_type()) { + // skip has no vector column index table + } else if (OB_FAIL(index_table_schema.get_column_ids(all_column_ids))) { + LOG_WARN("fail to get all column ids", K(ret), K(index_table_schema)); + } else { + // handle delta_buffer_table index table + for (int64_t i = 0; OB_SUCC(ret) && i < all_column_ids.count(); i++) { + const int64_t column_id = all_column_ids.at(i); + const ObColumnSchemaV2 *col_schema = nullptr; + ObArray extend_type_info; + if (OB_ISNULL(col_schema = index_table_schema.get_column_schema(column_id))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected col_schema, is nullptr", K(ret), K(column_id), K(index_table_schema)); + } else if (!col_schema->is_vec_vector_column()) { + // only need vec_type, here skip vec_vid column of delta_buffer_table rowkey column + } else if (OB_FAIL(get_vector_dim_from_extend_type_info(col_schema->get_extended_type_info(), + dim))) { + LOG_WARN("fail to get vector dim", K(ret)); + } + } + } + return ret; +} + +int ObVectorIndexUtil::get_vector_index_tid( + share::schema::ObSchemaGetterGuard *schema_guard, + const ObTableSchema &data_table_schema, + const ObIndexType index_type, + const int64_t col_id, + uint64_t &tid) +{ + int ret = OB_SUCCESS; + + ObSEArraysimple_index_infos; + const int64_t tenant_id = data_table_schema.get_tenant_id(); + tid = OB_INVALID_ID; + + if (!share::schema::is_vec_index(index_type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid index type for vector index", K(index_type)); + } else if (OB_FAIL(data_table_schema.get_simple_index_infos(simple_index_infos))) { + LOG_WARN("fail to get simple index infos failed", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < simple_index_infos.count() && tid == OB_INVALID_ID; ++i) { + const ObTableSchema *index_table_schema = nullptr; + if (OB_FAIL(schema_guard->get_table_schema(tenant_id, simple_index_infos.at(i).table_id_, index_table_schema))) { + LOG_WARN("fail to get index_table_schema", K(ret), K(tenant_id), "table_id", simple_index_infos.at(i).table_id_); + } else if (OB_ISNULL(index_table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("index table schema should not be null", K(ret), K(simple_index_infos.at(i).table_id_)); + } else if (!index_table_schema->is_vec_index()) { + // skip none vector index + } else if (index_table_schema->get_index_type() != index_type) { + // skip not spec index type + } else if (index_table_schema->is_vec_rowkey_vid_type() || index_table_schema->is_vec_vid_rowkey_type()) { + // rowkey_vid and vid_rowkey is shared, only one, just return + tid = simple_index_infos.at(i).table_id_; + } else { // delta buffer, index id, index snapshot, we should check cascaded_column by vec_vector col + for (int64_t j = 0; OB_SUCC(ret) && tid == OB_INVALID_ID && j < index_table_schema->get_column_count(); j++) { + const ObColumnSchemaV2 *col_schema = nullptr; + if (OB_ISNULL(col_schema = index_table_schema->get_column_schema_by_idx(j))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected col_schema, is nullptr", K(ret), K(j), KPC(index_table_schema)); + } else if (!col_schema->is_vec_vector_column()) { + // only need vec_vector column, here skip other column + } else { + // get generated column cascaded column id info + // (vector index table key, like `c1` in "create table xxx vector index idx(c1)") + ObArray cascaded_column_ids; + // get column_schema from data table using generate column id + const ObColumnSchemaV2 *ori_col_schema = data_table_schema.get_column_schema(col_schema->get_column_id()); + if (OB_ISNULL(ori_col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected table column", K(ret), K(col_schema->get_column_id()), K(data_table_schema)); + } else if (OB_FAIL(ori_col_schema->get_cascaded_column_ids(cascaded_column_ids))) { + LOG_WARN("failed to get cascaded column ids", K(ret)); + } else { + for (int64_t k = 0; OB_SUCC(ret) && tid == OB_INVALID_ID && k < cascaded_column_ids.count(); ++k) { + const ObColumnSchemaV2 *cascaded_column = NULL; + ObString new_col_name; + if (OB_ISNULL(cascaded_column = data_table_schema.get_column_schema(cascaded_column_ids.at(k)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected cascaded column", K(ret)); + } else if (cascaded_column->get_column_id() == col_id) { + tid = simple_index_infos.at(i).table_id_; + } + } + } + } + } + } + } + } + return ret; +} + +void ObVecIdxSnapshotDataWriteCtx::reset() +{ + ls_id_.reset(); + data_tablet_id_.reset(); + lob_meta_tablet_id_.reset(); + lob_piece_tablet_id_.reset(); + vals_.reset(); +} + +int ObVectorIndexUtil::generate_new_index_name(ObIAllocator &allocator, ObString &new_index_name) +{ + int ret = OB_SUCCESS; + char *buf = static_cast(allocator.alloc(OB_MAX_TABLE_NAME_LENGTH)); + int64_t pos = 0; + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc new memory", K(ret)); + } else if (OB_FAIL(databuff_printf(buf, + OB_MAX_TABLE_NAME_LENGTH, + pos, + "idx_%lu", ObTimeUtility::current_time()))){ + LOG_WARN("fail to printf current time", K(ret)); + } else { + new_index_name.assign_ptr(buf, static_cast(pos)); + } + return ret; +} + +int ObVectorIndexUtil::generate_switch_index_names( + const ObString &old_domain_index_name, + const ObString &new_domain_index_name, + ObIAllocator &allocator, + ObIArray &old_table_names, + ObIArray &new_table_names) +{ + int ret = OB_SUCCESS; + ObString old_delta_buffer_table_name = old_domain_index_name; + ObString new_delta_buffer_table_name = new_domain_index_name; + ObString new_index_id_table_name; + ObString new_snapshot_data_table_name; + ObString old_index_id_table_name; + ObString old_snapshot_data_table_name; + + if (OB_FAIL(new_table_names.push_back(new_delta_buffer_table_name))) { + LOG_WARN("fail to push back new delta buffer table name", K(ret)); + } else if (OB_FAIL(old_table_names.push_back(old_delta_buffer_table_name))) { + LOG_WARN("fail to push back old delta buffer table name", K(ret)); + } else if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator, + INDEX_TYPE_VEC_INDEX_ID_LOCAL, + new_domain_index_name, + new_index_id_table_name))) { + LOG_WARN("fail to generate delta buffer table name", K(ret), K(new_domain_index_name)); + } else if (OB_FAIL(new_table_names.push_back(new_index_id_table_name))) { + LOG_WARN("fail to push back new index id table name", K(ret)); + } else if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator, + INDEX_TYPE_VEC_INDEX_ID_LOCAL, + old_domain_index_name, + old_index_id_table_name))) { + LOG_WARN("fail to generate index id table name", K(ret), K(old_domain_index_name)); + } else if (OB_FAIL(old_table_names.push_back(old_index_id_table_name))) { + LOG_WARN("fail to push back new snapshot data table name", K(ret)); + } else if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator, + INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL, + new_domain_index_name, + new_snapshot_data_table_name))) { + LOG_WARN("fail to construct old snapshot data table name", K(ret), K(new_domain_index_name)); + } else if (OB_FAIL(new_table_names.push_back(new_snapshot_data_table_name))) { + LOG_WARN("fail to push back old snapshot data table name", K(ret)); + } else if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator, + INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL, + old_domain_index_name, + old_snapshot_data_table_name))) { + LOG_WARN("fail to construct old snapshot data table name", K(ret), K(old_domain_index_name)); + } else if (OB_FAIL(old_table_names.push_back(old_snapshot_data_table_name))) { + LOG_WARN("fail to push back old snapshot data table name", K(ret)); + } + return ret; +} + +int ObVectorIndexUtil::update_index_tables_status( + const int64_t tenant_id, + const int64_t database_id, + const ObIArray &old_table_names, + const ObIArray &new_table_names, + rootserver::ObDDLOperator &ddl_operator, + ObSchemaGetterGuard &schema_guard, + common::ObMySQLTransaction &trans, + ObIArray &table_schemas) +{ + int ret = OB_SUCCESS; + const bool is_index = true; + if (OB_INVALID_TENANT_ID == tenant_id || OB_INVALID_ID == database_id || + old_table_names.count() <= 0 || new_table_names.count() <= 0 || + (old_table_names.count() != new_table_names.count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", + K(ret), K(tenant_id), K(database_id), K(old_table_names), K(new_table_names)); + } else { + // update old index status + for (int64_t i = 0; OB_SUCC(ret) && i < old_table_names.count(); ++i) { + const ObString *ddl_stmt_str = NULL; + const ObTableSchema *index_schema = nullptr; + bool in_offline_ddl_white_list = false; + const bool is_built_in_index = i == 0 ? false : true; + const ObString &old_index_name = old_table_names.at(i); + const ObString &new_index_name = new_table_names.at(i); + SMART_VAR(ObTableSchema, tmp_schema) { + // ObTableSchema tmp_schema; + if (OB_FAIL(schema_guard.get_table_schema(tenant_id, + database_id, + old_index_name, + is_index, /* is_index */ + index_schema, + false, /* is_hidden_table */ + is_built_in_index))) { + LOG_WARN("fail to get table schema", K(ret), K(old_index_name)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(tenant_id), K(database_id), K(old_index_name)); + } else if (!index_schema->is_vec_index()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected, here should be vector index schema", K(ret), K(index_schema)); + } else if (index_schema->is_unavailable_index()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("switch name of unaveliable index is not support", KR(ret)); + } else if (OB_FALSE_IT(in_offline_ddl_white_list = index_schema->get_table_state_flag() != TABLE_STATE_NORMAL)) { + } else if (OB_FAIL(ddl_operator.update_index_status(tenant_id, + index_schema->get_data_table_id(), + index_schema->get_table_id(), + INDEX_STATUS_UNAVAILABLE, + in_offline_ddl_white_list, + trans, + ddl_stmt_str))) { + LOG_WARN("update_index_status failed", K(index_schema->get_data_table_id())); + } else if (OB_FAIL(tmp_schema.assign(*index_schema))) { + LOG_WARN("fail to assign schema", K(ret)); + } else if (OB_FALSE_IT(tmp_schema.set_index_status(INDEX_STATUS_UNAVAILABLE))) { + } else if (OB_FAIL(tmp_schema.set_table_name(new_index_name))) { + LOG_WARN("fail to set table name", K(ret), K(new_index_name)); + } else if (OB_FAIL(table_schemas.push_back(tmp_schema))) { + LOG_WARN("fail to push back schema", K(ret)); + } + } // end smart_var + } + } + return ret; +} + +int ObVectorIndexUtil::update_index_tables_attributes( + const int64_t tenant_id, + const int64_t database_id, + const int64_t data_table_id, + const int64_t expected_update_table_cnt, + const ObIArray &old_table_names, + const ObIArray &new_table_names, + rootserver::ObDDLOperator &ddl_operator, + ObSchemaGetterGuard &schema_guard, + common::ObMySQLTransaction &trans, + ObIArray &table_schemas) +{ + int ret = OB_SUCCESS; + const bool is_index = true; + if (OB_INVALID_TENANT_ID == tenant_id || OB_INVALID_ID == database_id || OB_INVALID_ID == data_table_id || + old_table_names.count() <= 0 || new_table_names.count() <= 0 || + (table_schemas.count() != old_table_names.count()) || + (old_table_names.count() != new_table_names.count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", + K(ret), K(tenant_id), K(database_id), K(data_table_id), + K(table_schemas.count()), K(old_table_names.count()), K(new_table_names.count())); + } else { + // switch new/old index name + for (int64_t i = 0; OB_SUCC(ret) && i < new_table_names.count(); i++) { + const ObString *ddl_stmt_str = NULL; + const ObTableSchema *index_schema = nullptr; + const bool is_built_in_index = i == 0 ? false : true; + const ObString &new_index_name = new_table_names.at(i); + const ObString &old_index_name = old_table_names.at(i); + SMART_VAR(ObTableSchema, tmp_schema) { + // ObTableSchema tmp_schema; + if (OB_FAIL(schema_guard.get_table_schema(tenant_id, + database_id, + new_index_name, + is_index, + index_schema, + false, /* is_hidden */ + is_built_in_index))) { + LOG_WARN("fail to get table schema", K(ret), K(new_index_name)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(tenant_id), K(database_id), K(new_index_name)); + } else if (!index_schema->is_vec_index()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected, here should be vector index schema", K(ret), KPC(index_schema)); + } else if (index_schema->is_unavailable_index()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("switch name of unaveliable index is not support", KR(ret), KPC(index_schema)); + } else if (OB_FAIL(tmp_schema.assign(*index_schema))) { + LOG_WARN("fail to assign index schema", K(ret)); + } else if (OB_FAIL(tmp_schema.set_table_name(old_index_name))) { + LOG_WARN("fail to set new table name", K(ret), K(old_index_name)); + } else if (OB_FAIL(table_schemas.push_back(tmp_schema))) { + LOG_WARN("fail to push back schema", K(ret)); + } + } // end smart_var + } + if (OB_SUCC(ret)) { // get data table schema to update schema version + SMART_VAR(ObTableSchema, tmp_schema) { + const ObTableSchema *data_table_schema = nullptr; + if (OB_FAIL(schema_guard.get_table_schema(tenant_id, data_table_id, data_table_schema))) { + LOG_WARN("fail to get data table schema", K(ret), K(data_table_id)); + } else if (OB_ISNULL(data_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), KP(data_table_schema)); + } else if (OB_FAIL(tmp_schema.assign(*data_table_schema))) { + LOG_WARN("fail to assign table schema", K(ret)); + } else if (OB_FAIL(table_schemas.push_back(tmp_schema))) { + LOG_WARN("fail to push back table schema", K(ret)); + } + } + } + // update table attribute + if (OB_FAIL(ret)) { + } else if (table_schemas.count() != expected_update_table_cnt) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected update table schema count", K(table_schemas.count())); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < table_schemas.count(); ++i) { + ObSchemaOperationType operation_type = OB_DDL_ALTER_TABLE; + const ObString *ddl_stmt_str = NULL; + if (OB_FAIL(ddl_operator.update_table_attribute(table_schemas.at(i), + trans, + operation_type, + ddl_stmt_str))) { + LOG_WARN("failed to update index table schema attribute", K(ret), K(table_schemas.at(i))); + } + } + } + } + return ret; +} + + +int ObVectorIndexUtil::generate_index_schema_from_exist_table( + const int64_t tenant_id, + share::schema::ObSchemaGetterGuard &schema_guard, + rootserver::ObDDLService &ddl_service, + const obrpc::ObCreateIndexArg &create_index_arg, + const ObTableSchema &data_table_schema, + ObTableSchema &new_index_schema) +{ + int ret = OB_SUCCESS; + const ObTableSchema *old_index_schema = nullptr; + const ObTableSchema *old_domain_index_schema = nullptr; + const int64_t old_domain_table_id = create_index_arg.index_table_id_; + const ObString database_name = create_index_arg.database_name_; + const ObString new_index_name_suffix = create_index_arg.index_name_; // e.g: idx_xxx_delta_buffer_table, idx_xxx_index_id_table... + ObString old_domain_index_name; // The name of the old table number 3. + ObString old_index_table_name; // The name of the old index table, is composed of the number 3 table and a suffix, and it used to obtain the schema of the old index table. + ObString new_index_table_name; // The name of the new index table + uint64_t new_index_table_id = OB_INVALID_ID; + ObArenaAllocator allocator(lib::ObLabel("DdlTaskTmp")); + ObSchemaService *schema_service = nullptr; + + if (OB_ISNULL(GCTX.schema_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("schema_service is null", K(ret)); + } else if (OB_ISNULL(schema_service = GCTX.schema_service_->get_schema_service())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("schema service is null", K(ret)); + } else if (tenant_id == OB_INVALID_TENANT_ID || old_domain_table_id == OB_INVALID_ID || + new_index_name_suffix.empty() || database_name.empty()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), + K(tenant_id), K(old_domain_table_id), K(new_index_name_suffix), K(database_name), KP(schema_service)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, old_domain_table_id, old_domain_index_schema))) { + LOG_WARN("fail to get old domain index schema", K(ret), K(tenant_id), K(old_domain_table_id)); + } else if (OB_ISNULL(old_domain_index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret)); + } else if (OB_FALSE_IT(old_domain_index_name = old_domain_index_schema->get_table_name())) { + } else if (OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(&allocator, + create_index_arg.index_type_, + old_domain_index_name, + old_index_table_name))) { + LOG_WARN("failed to generate index name", K(ret)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, + database_name, + old_index_table_name, + true, /* is_index */ + old_index_schema, + false, /* with_hidden_flag */ + share::schema::is_built_in_vec_index(create_index_arg.index_type_)))) { + LOG_WARN("fail to get origin index schema", K(ret), K(tenant_id), K(old_domain_index_name), K(old_index_table_name)); + } else if (OB_ISNULL(old_index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(old_index_table_name)); + } else if (OB_FAIL(new_index_schema.assign(*old_index_schema))) { + LOG_WARN("fail to assign schema", K(ret)); + } else if (OB_FAIL(ObTableSchema::build_index_table_name(allocator, + data_table_schema.get_table_id(), + new_index_name_suffix, + new_index_table_name))) { + LOG_WARN("fail to build index table name", K(ret), K(create_index_arg.index_name_)); + } else { + if (FALSE_IT(new_index_schema.set_tenant_id(tenant_id))) { + } else if (OB_FAIL(new_index_schema.set_table_name(new_index_table_name))) { + LOG_WARN("set table name failed", K(ret), K(new_index_table_name)); + } else if (OB_FAIL(schema_service->fetch_new_table_id(tenant_id, new_index_table_id))) { + LOG_WARN("failed to fetch_new_table_id", K(ret)); + } else if (OB_FAIL(ddl_service.generate_object_id_for_partition_schema(new_index_schema))) { + LOG_WARN("fail to generate object_id for partition schema", KR(ret), K(new_index_schema)); + } else if (OB_FAIL(ddl_service.generate_tablet_id(new_index_schema))) { + LOG_WARN("fail to generate tablet id for hidden table", K(ret), K(new_index_schema)); + } else { + new_index_schema.set_max_used_column_id(max( + new_index_schema.get_max_used_column_id(), data_table_schema.get_max_used_column_id())); + new_index_schema.set_table_id(new_index_table_id); + new_index_schema.set_index_status(INDEX_STATUS_UNAVAILABLE); + new_index_schema.set_table_state_flag(data_table_schema.get_table_state_flag()); + } + } + LOG_DEBUG("generate_index_schema_from_exist_table", K(ret), K(new_index_table_name)); + return ret; +} + +} +} diff --git a/src/share/vector_index/ob_vector_index_util.h b/src/share/vector_index/ob_vector_index_util.h new file mode 100644 index 0000000000..9bd594b739 --- /dev/null +++ b/src/share/vector_index/ob_vector_index_util.h @@ -0,0 +1,139 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + + +#ifndef OCEANBASE_SHARE_VECTOR_INDEX_UTIL_H_ +#define OCEANBASE_SHARE_VECTOR_INDEX_UTIL_H_ + +#include "lib/string/ob_string.h" +#include "lib/container/ob_array.h" +#include "share/vector_index/ob_plugin_vector_index_adaptor.h" +#include "share/schema/ob_table_schema.h" +#include "rootserver/ob_ddl_operator.h" +#include "rootserver/ob_ddl_service.h" + +namespace oceanbase +{ +namespace share +{ + +class ObVectorIndexUtil final +{ +public: + static int parser_params_from_string( + const ObString &origin_string, + ObVectorIndexHNSWParam ¶m); + static int insert_index_param_str( + const ObString &new_add_param, + ObIAllocator &allocator, + ObString ¤t_index_param); + static int get_index_name_prefix( + const schema::ObTableSchema &index_schema, + ObString &prefix); + static int check_column_has_vector_index( + const ObTableSchema &data_table_schema, + ObSchemaGetterGuard &schema_guard, + const int64_t col_id, + bool &is_column_has_vector_index); + static int get_vector_index_column_name( + const ObTableSchema &data_table_schema, + const ObTableSchema &index_table_schema, + ObIArray &col_names); + static int get_vector_index_column_id( + const ObTableSchema &data_table_schema, + const ObTableSchema &index_table_schema, + ObIArray &col_ids); + static int get_vector_index_column_dim( + const ObTableSchema &index_table_schema, + int64_t &dim); + static int get_vector_index_tid( + share::schema::ObSchemaGetterGuard *schema_guard, + const ObTableSchema &data_table_schema, + const ObIndexType index_type, + const int64_t col_id, + uint64_t &tid); + static int get_vector_dim_from_extend_type_info( + const ObIArray &extend_type_info, + int64_t &dim); + static int generate_new_index_name( + ObIAllocator &allocator, + ObString &new_index_name); + static int generate_switch_index_names( + const ObString &old_domain_index_name, + const ObString &new_domain_index_name, + ObIAllocator &allocator, + ObIArray &old_table_names, + ObIArray &new_table_names); + static int update_index_tables_status( + const int64_t tenant_id, + const int64_t database_id, + const ObIArray &old_table_names, + const ObIArray &new_table_names, + rootserver::ObDDLOperator &ddl_operator, + ObSchemaGetterGuard &schema_guard, + common::ObMySQLTransaction &trans, + ObIArray &table_schemas); + static int update_index_tables_attributes( + const int64_t tenant_id, + const int64_t database_id, + const int64_t data_table_id, + const int64_t expected_update_table_cnt, + const ObIArray &old_table_names, + const ObIArray &new_table_names, + rootserver::ObDDLOperator &ddl_operator, + ObSchemaGetterGuard &schema_guard, + common::ObMySQLTransaction &trans, + ObIArray &table_schemas); + static int generate_index_schema_from_exist_table( + const int64_t tenant_id, + share::schema::ObSchemaGetterGuard &schema_guard, + rootserver::ObDDLService &ddl_service, + const obrpc::ObCreateIndexArg &create_index_arg, + const ObTableSchema &data_table_schema, + ObTableSchema &new_index_schema); + static bool has_multi_index_on_same_column( + ObIArray &vec_index_cols, + const uint64_t col_id); +}; + +// For vector index snapshot write data +class ObVecIdxSnapshotDataWriteCtx final +{ +public: + ObVecIdxSnapshotDataWriteCtx() + : ls_id_(), data_tablet_id_(), lob_meta_tablet_id_(), lob_piece_tablet_id_(), + vals_() + {} + ~ObVecIdxSnapshotDataWriteCtx() {} + ObLSID& get_ls_id() { return ls_id_; } + const ObLSID& get_ls_id() const { return ls_id_; } + ObTabletID& get_data_tablet_id() { return data_tablet_id_; } + const ObTabletID& get_data_tablet_id() const { return data_tablet_id_; } + ObTabletID& get_lob_meta_tablet_id() { return lob_meta_tablet_id_; } + const ObTabletID& get_lob_meta_tablet_id() const { return lob_meta_tablet_id_; } + ObTabletID& get_lob_piece_tablet_id() { return lob_piece_tablet_id_; } + const ObTabletID& get_lob_piece_tablet_id() const { return lob_piece_tablet_id_; } + ObIArray& get_vals() { return vals_; } + void reset(); + TO_STRING_KV(K(ls_id_), K(data_tablet_id_), K(lob_meta_tablet_id_), K(lob_piece_tablet_id_), K(vals_)); +public: + ObLSID ls_id_; + ObTabletID data_tablet_id_; + ObTabletID lob_meta_tablet_id_; + ObTabletID lob_piece_tablet_id_; + ObArray vals_; +}; + +} +} + +#endif \ No newline at end of file diff --git a/src/share/vector_type/ob_vector_cosine_distance.cpp b/src/share/vector_type/ob_vector_cosine_distance.cpp new file mode 100644 index 0000000000..113c6b6d59 --- /dev/null +++ b/src/share/vector_type/ob_vector_cosine_distance.cpp @@ -0,0 +1,78 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_vector_cosine_distance.h" +namespace oceanbase +{ +namespace common +{ +int ObVectorCosineDistance::cosine_similarity_func(const float *a, const float *b, const int64_t len, double &similarity) +{ + return cosine_similarity_normal(a, b, len, similarity); +} + +int ObVectorCosineDistance::cosine_distance_func(const float *a, const float *b, const int64_t len, double &distance) { + int ret = OB_SUCCESS; + double similarity = 0; + if (OB_FAIL(cosine_similarity_func(a, b, len, similarity))) { + if (OB_ERR_NULL_VALUE != ret) { + LIB_LOG(WARN, "failed to cal cosine similaity", K(ret)); + } + } else { + distance = get_cosine_distance(similarity); + } + return ret; +} + +OB_INLINE double ObVectorCosineDistance::get_cosine_distance(double similarity) +{ + if (similarity > 1.0) { + similarity = 1.0; + } else if (similarity < -1.0) { + similarity = -1.0; + } + return 1.0 - similarity; +} + +OB_INLINE int ObVectorCosineDistance::cosine_calculate_normal(const float *a, const float *b, const int64_t len, double &ip, double &abs_dist_a, double &abs_dist_b) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < len; ++i) { + ip += a[i] * b[i]; + abs_dist_a += a[i] * a[i]; + abs_dist_b += b[i] * b[i]; + if (OB_UNLIKELY(0 != ::isinf(ip) || 0 != ::isinf(abs_dist_a) || 0 != ::isinf(abs_dist_b))) { + ret = OB_NUMERIC_OVERFLOW; + LIB_LOG(WARN, "value is overflow", K(ret), K(ip), K(abs_dist_a), K(abs_dist_b)); + } + } + return ret; +} + +OB_INLINE int ObVectorCosineDistance::cosine_similarity_normal(const float *a, const float *b, const int64_t len, double &similarity) +{ + int ret = OB_SUCCESS; + double ip = 0; + double abs_dist_a = 0; + double abs_dist_b = 0; + similarity = 0; + if (OB_FAIL(cosine_calculate_normal(a, b, len, ip, abs_dist_a, abs_dist_b))) { + LIB_LOG(WARN, "failed to cal cosine", K(ret), K(ip)); + } else if (0 == abs_dist_a || 0 == abs_dist_b) { + ret = OB_ERR_NULL_VALUE; + } else { + similarity = ip / (sqrt(abs_dist_a * abs_dist_b)); + } + return ret; +} +} +} \ No newline at end of file diff --git a/src/share/vector_type/ob_vector_cosine_distance.h b/src/share/vector_type/ob_vector_cosine_distance.h new file mode 100644 index 0000000000..ae5f2ccad8 --- /dev/null +++ b/src/share/vector_type/ob_vector_cosine_distance.h @@ -0,0 +1,38 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIB_OB_VECTOR_COSINE_DISTANCE_H_ +#define OCEANBASE_LIB_OB_VECTOR_COSINE_DISTANCE_H_ + +#include "lib/utility/ob_print_utils.h" +#include "lib/oblog/ob_log.h" +#include "lib/ob_define.h" +#include "common/object/ob_obj_compare.h" + +namespace oceanbase +{ +namespace common +{ +struct ObVectorCosineDistance +{ + static int cosine_similarity_func(const float *a, const float *b, const int64_t len, double &similarity); + static int cosine_distance_func(const float *a, const float *b, const int64_t len, double &distance); + + // normal func + OB_INLINE static int cosine_similarity_normal(const float *a, const float *b, const int64_t len, double &similarity); + OB_INLINE static int cosine_calculate_normal(const float *a, const float *b, const int64_t len, double &ip, double &abs_dist_a, double &abs_dist_b); + OB_INLINE static double get_cosine_distance(double similarity); + // TODO(@jingshui) add simd func +}; +} // common +} // oceanbase +#endif \ No newline at end of file diff --git a/src/share/vector_type/ob_vector_ip_distance.cpp b/src/share/vector_type/ob_vector_ip_distance.cpp new file mode 100644 index 0000000000..2c46ced824 --- /dev/null +++ b/src/share/vector_type/ob_vector_ip_distance.cpp @@ -0,0 +1,36 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_vector_ip_distance.h" +namespace oceanbase +{ +namespace common +{ +int ObVectorIpDistance::ip_distance_func(const float *a, const float *b, const int64_t len, double &distance) +{ +return ip_distance_normal(a, b, len, distance); +} + +OB_INLINE int ObVectorIpDistance::ip_distance_normal(const float *a, const float *b, const int64_t len, double &distance) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < len; ++i) { + distance += a[i] * b[i]; + if (OB_UNLIKELY(0 != ::isinf(distance))) { + ret = OB_NUMERIC_OVERFLOW; + LIB_LOG(WARN, "value is overflow", K(ret), K(distance)); + } + } + return ret; +} +} +} \ No newline at end of file diff --git a/src/share/vector_type/ob_vector_ip_distance.h b/src/share/vector_type/ob_vector_ip_distance.h new file mode 100644 index 0000000000..4fbbd121d5 --- /dev/null +++ b/src/share/vector_type/ob_vector_ip_distance.h @@ -0,0 +1,36 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIB_OB_VECTOR_IP_DISTANCE_H_ +#define OCEANBASE_LIB_OB_VECTOR_IP_DISTANCE_H_ + +#include "lib/utility/ob_print_utils.h" +#include "lib/oblog/ob_log.h" +#include "lib/ob_define.h" +#include "common/object/ob_obj_compare.h" + +namespace oceanbase +{ +namespace common +{ +struct ObVectorIpDistance +{ + static int ip_distance_func(const float *a, const float *b, const int64_t len, double &distance); + + // normal func + OB_INLINE static int ip_distance_normal(const float *a, const float *b, const int64_t len, double &distance); + // TODO(@jingshui) add simd func +}; + +} // common +} // oceanbase +#endif \ No newline at end of file diff --git a/src/share/vector_type/ob_vector_l1_distance.cpp b/src/share/vector_type/ob_vector_l1_distance.cpp new file mode 100644 index 0000000000..2937a84c0d --- /dev/null +++ b/src/share/vector_type/ob_vector_l1_distance.cpp @@ -0,0 +1,41 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_vector_l1_distance.h" +namespace oceanbase +{ +namespace common +{ +int ObVectorL1Distance::l1_distance_func(const float *a, const float *b, const int64_t len, double &distance) +{ +return l1_distance_normal(a, b, len, distance); +} + +OB_INLINE int ObVectorL1Distance::l1_distance_normal(const float *a, const float *b, const int64_t len, double &distance) +{ + int ret = OB_SUCCESS; + double sum = 0; + double diff = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < len; ++i) { + sum += fabs(a[i] - b[i]); + if (OB_UNLIKELY(0 != ::isinf(sum))) { + ret = OB_NUMERIC_OVERFLOW; + LIB_LOG(WARN, "value is overflow", K(ret), K(diff), K(sum)); + } + } + if (OB_SUCC(ret)) { + distance = sum; + } + return ret; +} +} +} \ No newline at end of file diff --git a/src/share/vector_type/ob_vector_l1_distance.h b/src/share/vector_type/ob_vector_l1_distance.h new file mode 100644 index 0000000000..afb324fb07 --- /dev/null +++ b/src/share/vector_type/ob_vector_l1_distance.h @@ -0,0 +1,35 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIB_OB_VECTOR_L1_DISTANCE_H_ +#define OCEANBASE_LIB_OB_VECTOR_L1_DISTANCE_H_ + +#include "lib/utility/ob_print_utils.h" +#include "lib/oblog/ob_log.h" +#include "lib/ob_define.h" +#include "common/object/ob_obj_compare.h" + +namespace oceanbase +{ +namespace common +{ +struct ObVectorL1Distance +{ + static int l1_distance_func(const float *a, const float *b, const int64_t len, double &distance); + + // normal func + OB_INLINE static int l1_distance_normal(const float *a, const float *b, const int64_t len, double &distance); + // TODO(@jingshui) add simd func +}; +} // common +} // oceanbase +#endif \ No newline at end of file diff --git a/src/share/vector_type/ob_vector_l2_distance.cpp b/src/share/vector_type/ob_vector_l2_distance.cpp new file mode 100644 index 0000000000..d660c5a94e --- /dev/null +++ b/src/share/vector_type/ob_vector_l2_distance.cpp @@ -0,0 +1,55 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_vector_l2_distance.h" +namespace oceanbase +{ +namespace common +{ +int ObVectorL2Distance::l2_square_func(const float *a, const float *b, const int64_t len, double &square) +{ + return l2_square_normal(a, b, len, square); +} + +int ObVectorL2Distance::l2_distance_func(const float *a, const float *b, const int64_t len, double &distance) +{ + int ret = OB_SUCCESS; + double square = 0; + distance = 0; + if (OB_FAIL(l2_square_func(a, b, len, square))) { + LIB_LOG(WARN, "failed to cal l2 square", K(ret)); + } else { + distance = sqrt(square); + } + return ret; +} + +OB_INLINE int ObVectorL2Distance::l2_square_normal(const float *a, const float *b, const int64_t len, double &square) +{ + int ret = OB_SUCCESS; + double sum = 0; + double diff = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < len; ++i) { + diff = a[i] - b[i]; + sum += (diff * diff); + if (OB_UNLIKELY(0 != ::isinf(sum))) { + ret = OB_NUMERIC_OVERFLOW; + LIB_LOG(WARN, "value is overflow", K(ret), K(diff), K(sum)); + } + } + if (OB_SUCC(ret)) { + square = sum; + } + return ret; +} +} +} \ No newline at end of file diff --git a/src/share/vector_type/ob_vector_l2_distance.h b/src/share/vector_type/ob_vector_l2_distance.h new file mode 100644 index 0000000000..98d99ee783 --- /dev/null +++ b/src/share/vector_type/ob_vector_l2_distance.h @@ -0,0 +1,37 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIB_OB_VECTOR_L2_DISTANCE_H_ +#define OCEANBASE_LIB_OB_VECTOR_L2_DISTANCE_H_ + +#include "lib/utility/ob_print_utils.h" +#include "lib/oblog/ob_log.h" +#include "lib/ob_define.h" +#include "common/object/ob_obj_compare.h" + +namespace oceanbase +{ +namespace common +{ +struct ObVectorL2Distance +{ + static int l2_square_func(const float *a, const float *b, const int64_t len, double &square); + static int l2_distance_func(const float *a, const float *b, const int64_t len, double &distance); + + // normal func + OB_INLINE static int l2_square_normal(const float *a, const float *b, const int64_t len, double &square); + // TODO(@jingshui) add simd func +}; + +} // common +} // oceanbase +#endif \ No newline at end of file diff --git a/src/share/vector_type/ob_vector_norm.cpp b/src/share/vector_type/ob_vector_norm.cpp new file mode 100644 index 0000000000..6bf0c9e979 --- /dev/null +++ b/src/share/vector_type/ob_vector_norm.cpp @@ -0,0 +1,54 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_vector_norm.h" +namespace oceanbase +{ +namespace common +{ +int ObVectorNorm::vector_norm_square_func(const float *a, const int64_t len, double &norm_square) +{ + return vector_norm_square_normal(a, len, norm_square); +} + +int ObVectorNorm::vector_norm_func(const float *a, const int64_t len, double &norm) +{ + int ret = OB_SUCCESS; + double norm_square = 0; + norm = 0; + if (OB_FAIL(vector_norm_square_func(a, len, norm_square))) { + LIB_LOG(WARN, "failed to cal l2 square", K(ret)); + } else { + norm = sqrt(norm_square); + } + return ret; +} + +OB_INLINE int ObVectorNorm::vector_norm_square_normal(const float *a, const int64_t len, double &norm_square) +{ + int ret = OB_SUCCESS; + double sum = 0; + double diff = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < len; ++i) { + sum += (a[i] * a[i]); + if (OB_UNLIKELY(0 != ::isinf(sum))) { + ret = OB_NUMERIC_OVERFLOW; + LIB_LOG(WARN, "value is overflow", K(ret), K(diff), K(sum)); + } + } + if (OB_SUCC(ret)) { + norm_square = sum; + } + return ret; +} +} +} \ No newline at end of file diff --git a/src/share/vector_type/ob_vector_norm.h b/src/share/vector_type/ob_vector_norm.h new file mode 100644 index 0000000000..55884874d5 --- /dev/null +++ b/src/share/vector_type/ob_vector_norm.h @@ -0,0 +1,36 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIB_OB_VECTOR_NORM_H_ +#define OCEANBASE_LIB_OB_VECTOR_NORM_H_ + +#include "lib/utility/ob_print_utils.h" +#include "lib/oblog/ob_log.h" +#include "lib/ob_define.h" +#include "common/object/ob_obj_compare.h" + +namespace oceanbase +{ +namespace common +{ +struct ObVectorNorm +{ + static int vector_norm_square_func(const float *a, const int64_t len, double &norm_square); + static int vector_norm_func(const float *a, const int64_t len, double &norm); + + // normal func + OB_INLINE static int vector_norm_square_normal(const float *a, const int64_t len, double &norm_square); + // TODO(@jingshui) add simd func +}; +} // common +} // oceanbase +#endif \ No newline at end of file diff --git a/src/sql/CMakeLists.txt b/src/sql/CMakeLists.txt index e8fbdae9fb..cc2789430a 100644 --- a/src/sql/CMakeLists.txt +++ b/src/sql/CMakeLists.txt @@ -42,6 +42,7 @@ ob_set_subtarget(ob_sql das das/ob_das_delete_op.cpp das/ob_das_dml_ctx_define.cpp das/ob_domain_index_lookup_op.cpp + das/ob_vector_index_lookup_op.cpp das/ob_das_extra_data.cpp das/ob_das_factory.cpp das/ob_das_insert_op.cpp @@ -58,6 +59,7 @@ ob_set_subtarget(ob_sql das das/ob_das_id_rpc.cpp das/ob_das_id_cache.cpp das/ob_das_ir_define.cpp + das/ob_das_vec_define.cpp das/ob_das_task_result.cpp das/ob_das_spatial_index_lookup_op.cpp das/ob_das_retry_ctrl.cpp @@ -65,6 +67,7 @@ ob_set_subtarget(ob_sql das das/ob_das_domain_utils.cpp das/ob_das_attach_define.cpp das/ob_group_scan_iter.cpp + das/ob_das_dml_vec_iter.cpp das/iter/ob_das_iter.cpp das/iter/ob_das_merge_iter.cpp das/iter/ob_das_lookup_iter.cpp @@ -76,6 +79,7 @@ ob_set_subtarget(ob_sql das das/iter/ob_das_text_retrieval_iter.cpp das/iter/ob_das_text_retrieval_merge_iter.cpp das/iter/ob_das_iter_utils.cpp + das/iter/ob_das_vid_merge_iter.cpp ) ob_set_subtarget(ob_sql dtl @@ -758,6 +762,16 @@ ob_set_subtarget(ob_sql engine_expr engine/expr/ob_expr_inner_row_cmp_val.cpp engine/expr/ob_expr_last_refresh_scn.cpp engine/expr/ob_expr_json_utils.cpp + engine/expr/ob_expr_array.cpp + engine/expr/ob_expr_vector.cpp + engine/expr/ob_array_expr_utils.cpp + engine/expr/ob_array_cast.cpp + engine/expr/ob_expr_vec_vid.cpp + engine/expr/ob_expr_vec_type.cpp + engine/expr/ob_expr_vec_vector.cpp + engine/expr/ob_expr_vec_scn.cpp + engine/expr/ob_expr_vec_key.cpp + engine/expr/ob_expr_vec_data.cpp engine/expr/ob_expr_topn_filter.cpp engine/expr/ob_expr_inner_table_option_printer.cpp engine/expr/ob_expr_rb_build_empty.cpp @@ -770,6 +784,7 @@ ob_set_subtarget(ob_sql engine_expr engine/expr/ob_expr_rb_calc.cpp engine/expr/ob_expr_rb_to_string.cpp engine/expr/ob_expr_rb_from_string.cpp + engine/expr/ob_expr_array_contains.cpp engine/expr/ob_expr_decode_trace_id.cpp engine/expr/ob_expr_split_part.cpp ) diff --git a/src/sql/code_generator/ob_dml_cg_service.cpp b/src/sql/code_generator/ob_dml_cg_service.cpp index 26955a6e95..432c96ed7c 100644 --- a/src/sql/code_generator/ob_dml_cg_service.cpp +++ b/src/sql/code_generator/ob_dml_cg_service.cpp @@ -868,6 +868,7 @@ int ObDmlCgService::generate_conflict_checker_ctdef(ObLogInsert &op, int ret = OB_SUCCESS; ObSEArray rowkey_exprs; bool is_heap_table = false; + bool need_vec_vid_merge_iter = false; // When the partition key is a virtual generated column, // the table with the primary key needs to be replaced, // and the table without the primary key does not need to be replaced @@ -881,6 +882,9 @@ int ObDmlCgService::generate_conflict_checker_ctdef(ObLogInsert &op, LOG_WARN("fail to generate data_table rowkey_expr", K(ret), K(rowkey_exprs)); } else if (OB_FAIL(generate_scan_ctdef(op, index_dml_info, conflict_checker_ctdef.das_scan_ctdef_))) { LOG_WARN("fail to generate das_scan_ctdef", K(ret)); + } else if (OB_FAIL(generate_scan_with_vec_vid_ctdef_if_need(op, index_dml_info, + conflict_checker_ctdef.das_scan_ctdef_,conflict_checker_ctdef.attach_spec_))) { + LOG_WARN("fail to generate scan with doc id ctdef if need", K(ret)); } else if (OB_FAIL(generate_constraint_infos(op, index_dml_info, conflict_checker_ctdef.cst_ctdefs_))) { @@ -1016,14 +1020,25 @@ int ObDmlCgService::generate_constraint_infos(ObLogInsert &op, return ret; } -int ObDmlCgService::generate_access_exprs(const common::ObIArray &columns, - common::ObIArray &access_exprs) +int ObDmlCgService::generate_access_exprs( + const common::ObIArray &columns, + const ObLogicalOperator &op, + const bool need_vec_vid, + const uint64_t vec_vid_col_id, + common::ObIArray &access_exprs, + common::ObIArray &vec_vid_expr) { int ret = OB_SUCCESS; for (int64_t i = 0; OB_SUCC(ret) && i < columns.count(); ++i) { + uint64_t base_cid = OB_INVALID_ID; ObRawExpr *expr = columns.at(i); - if (expr->is_column_ref_expr() && - static_cast(expr)->is_virtual_generated_column()) { + if (OB_FAIL(get_column_ref_base_cid(op, columns.at(i), base_cid))) { + LOG_WARN("get base column id failed", K(ret), K(i), K(columns.at(i))); + } else if (need_vec_vid && vec_vid_col_id == base_cid) { + if (OB_FAIL(add_var_to_array_no_dup(vec_vid_expr, expr))) { + LOG_WARN("failed to add param expr", K(ret)); + } + } else if (expr->is_column_ref_expr() && static_cast(expr)->is_virtual_generated_column()) { // do nothing. } else { if (OB_FAIL(add_var_to_array_no_dup(access_exprs, expr))) { @@ -1041,12 +1056,17 @@ int ObDmlCgService::generate_scan_ctdef(ObLogInsert &op, int ret = OB_SUCCESS; ObSEArray access_exprs; ObSEArray dep_exprs; + ObSEArray vec_vid_raw_expr; + ObSEArray vec_vid_expr; + ObSEArray tsc_col_ids; ObSqlSchemaGuard *schema_guard = NULL; const ObTableSchema *table_schema = NULL; + bool need_vec_vid = false; uint64_t ref_table_id = index_dml_info.ref_table_id_; // 主表的index_tid_和ref_table_id_都是一样的 scan_ctdef.ref_table_id_ = ref_table_id; const uint64_t tenant_id = MTL_ID(); + uint64_t vec_vid_col_id = OB_INVALID_ID; if (OB_ISNULL(op.get_plan()) || OB_ISNULL(schema_guard = op.get_plan()->get_optimizer_context().get_sql_schema_guard()) || OB_ISNULL(schema_guard->get_schema_guard())) { @@ -1054,15 +1074,21 @@ int ObDmlCgService::generate_scan_ctdef(ObLogInsert &op, LOG_ERROR("get unexpected null", K(schema_guard), K(ret)); } else if (OB_FAIL(schema_guard->get_table_schema(ref_table_id, table_schema))) { LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_FAIL(check_need_vec_vid_merge_iter(op, ref_table_id, need_vec_vid))) { + LOG_WARN("fail to check need vec vid merge iter", K(ret), K(ref_table_id)); } else if (OB_FAIL(schema_guard->get_schema_guard()->get_schema_version( TABLE_SCHEMA, tenant_id, ref_table_id, scan_ctdef.schema_version_))) { LOG_WARN("fail to get schema version", K(ret), K(tenant_id), K(ref_table_id)); - } else if (OB_FAIL(generate_access_exprs(index_dml_info.column_exprs_, access_exprs))) { + } else if (need_vec_vid && OB_FAIL(table_schema->get_vec_index_vid_col_id(vec_vid_col_id))) { + LOG_WARN("fail to get vec vid column id", K(ret), KPC(table_schema)); + } else if (OB_FAIL(generate_access_exprs(index_dml_info.column_exprs_, op, need_vec_vid, vec_vid_col_id, access_exprs, vec_vid_raw_expr))) { LOG_WARN("fail to generate access exprs ", K(ret)); } else if (OB_FAIL(cg_.generate_rt_exprs(access_exprs, scan_ctdef.pd_expr_spec_.access_exprs_))) { LOG_WARN("fail to generate rt exprs ", K(ret)); - } else if (OB_FAIL(scan_ctdef.access_column_ids_.init(index_dml_info.column_exprs_.count()))) { + } else if (OB_FAIL(cg_.generate_rt_exprs(vec_vid_raw_expr, vec_vid_expr))) { + LOG_WARN("fail to generate doc id rt exprs", K(ret), K(vec_vid_raw_expr)); + } else if (OB_FAIL(scan_ctdef.access_column_ids_.init(access_exprs.count()))) { LOG_WARN("fail to init output_column_ids_ ", K(ret)); } else { ARRAY_FOREACH(index_dml_info.column_exprs_, i) { @@ -1071,10 +1097,16 @@ int ObDmlCgService::generate_scan_ctdef(ObLogInsert &op, if (OB_ISNULL(item)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid column item", K(i), K(item)); - } else if (item->is_virtual_generated_column() && !item->is_xml_column()) { + } else if (item->is_virtual_generated_column() && !item->is_xml_column() && !item->is_vec_vid_column()) { // do nothing. } else if (OB_FAIL(get_column_ref_base_cid(op, item, base_cid))) { LOG_WARN("get base column id failed", K(ret), K(item)); + } else if (OB_FAIL(tsc_col_ids.push_back(base_cid))) { + LOG_WARN("fail to push back column id", K(ret)); + } else if (vec_vid_col_id == base_cid) { + if (need_vec_vid) { + scan_ctdef.vec_vid_idx_ = tsc_col_ids.count() - 1; + } } else if (OB_FAIL(scan_ctdef.access_column_ids_.push_back(base_cid))) { LOG_WARN("fail to add column id", K(ret)); } @@ -1092,7 +1124,8 @@ int ObDmlCgService::generate_scan_ctdef(ObLogInsert &op, op.get_type(), false))) { LOG_WARN("generate calc exprs failed", K(ret)); - } else if (OB_FAIL(cg_.tsc_cg_service_.generate_das_result_output(scan_ctdef.access_column_ids_, + } else if (OB_FAIL(cg_.tsc_cg_service_.generate_das_result_output(tsc_col_ids, + vec_vid_expr, scan_ctdef, nullptr))) { LOG_WARN("generate das result output failed", K(ret)); @@ -1275,6 +1308,50 @@ int ObDmlCgService::add_geo_col_projector(const ObIArray &cur_row, return ret; } +template +int ObDmlCgService::add_vec_idx_col_projector(const ObIArray &cur_row, + const ObIArray &full_row, + const ObIArray &dml_column_ids, + ObDASDMLBaseCtDef &das_ctdef, + IntFixedArray &row_projector) +{ + int ret = OB_SUCCESS; + // for vec vid, need to set new_row to VEC_VID expr + int64_t column_idx = OB_INVALID_INDEX; + int64_t projector_idx = OB_INVALID_INDEX; + int64_t pre_projector_idx = OB_INVALID_INDEX; + uint64_t vid_cid = das_ctdef.table_param_.get_data_table().get_vec_id_col_id(); + if (vid_cid != OB_INVALID_ID) { + for (int64_t i = 0; OB_SUCC(ret) && i < full_row.count(); ++i) { + if (full_row.at(i)->get_expr_type() == T_FUN_SYS_VEC_VID) { + projector_idx = i; + break; + } + } + if (projector_idx == OB_INVALID_INDEX) { + // do nothing, only update primary key will not change vid, maybe not exist + } else if (has_exist_in_array(dml_column_ids, vid_cid, &column_idx)) { + ObRawExpr *column_expr = cur_row.at(column_idx); + if (!has_exist_in_array(full_row, column_expr, &pre_projector_idx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row column not found in full row columns", K(ret), + K(column_idx), KPC(cur_row.at(column_idx))); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < row_projector.count(); ++i) { + if (row_projector.at(i) == pre_projector_idx) { + // replace vid col ref to VEC_VID for new row + row_projector.at(i) = projector_idx; + LOG_DEBUG("succeed do replace col ref to vec_vid expr for new row", K(i), K(pre_projector_idx), + K(projector_idx), K(vid_cid), K(full_row)); + break; + } + } + } + } + } + return ret; +} + int ObDmlCgService::append_all_pk_column_id(ObSchemaGetterGuard *schema_guard, const ObTableSchema *table_schema, ObIArray &minimal_column_ids) @@ -1875,6 +1952,8 @@ int ObDmlCgService::generate_das_projector(const ObIArray &dml_column_ int ret = OB_SUCCESS; IntFixedArray &old_row_projector = das_ctdef.old_row_projector_; IntFixedArray &new_row_projector = das_ctdef.new_row_projector_; + bool is_vec_vid_index = das_ctdef.table_param_.get_data_table().is_vector_index() + && das_ctdef.op_type_ == DAS_OP_TABLE_UPDATE; bool is_spatial_index = das_ctdef.table_param_.get_data_table().is_spatial_index(); uint8_t extra_geo = (is_spatial_index) ? 1 : 0; //generate old row projector @@ -1957,6 +2036,10 @@ int ObDmlCgService::generate_das_projector(const ObIArray &dml_column_ das_ctdef, new_row_projector))) { LOG_WARN("add geo column projector failed", K(ret)); } + if (OB_SUCC(ret) && is_vec_vid_index && + OB_FAIL(add_vec_idx_col_projector(new_row, full_row, dml_column_ids, das_ctdef, new_row_projector))) { + LOG_WARN("add vec idx column for new projector failed", K(ret)); + } } LOG_TRACE("print dml_column_ids", K(dml_column_ids), K(storage_column_ids), @@ -2034,6 +2117,7 @@ int ObDmlCgService::generate_das_dml_ctdef(ObLogDelUpd &op, das_dml_ctdef.index_tid_ = index_tid; das_dml_ctdef.is_ignore_ = op.is_ignore(); das_dml_ctdef.is_batch_stmt_ = op.get_plan()->get_optimizer_context().is_batched_multi_stmt(); + das_dml_ctdef.is_access_vidx_as_master_table_ = false; ObSQLSessionInfo *session = nullptr; int64_t binlog_row_image = ObBinlogRowImage::FULL; if (OB_FAIL(convert_dml_column_info(index_tid, false, das_dml_ctdef))) { @@ -2078,6 +2162,11 @@ int ObDmlCgService::generate_das_dml_ctdef(ObLogDelUpd &op, } } #endif + if (OB_FAIL(ret)) { + } else if (das_dml_ctdef.table_param_.get_data_table().is_vector_index() && + 0 == index_dml_info.related_index_ids_.count()) { + das_dml_ctdef.is_access_vidx_as_master_table_ = true; + } return ret; } @@ -3979,5 +4068,185 @@ int ObDmlCgService::init_encrypt_table_meta_( return ret; } #endif + +int ObDmlCgService::check_need_vec_vid_merge_iter( + ObLogicalOperator &op, + const uint64_t ref_table_id, + bool &need_vec_vid_merge_iter) +{ + int ret = OB_SUCCESS; + ObLogPlan *log_plan = op.get_plan(); + ObSchemaGetterGuard *schema_guard = nullptr; + const ObTableSchema *table_schema = nullptr; + const ObDelUpdStmt *dml_stmt = nullptr; + need_vec_vid_merge_iter = false; + if (OB_ISNULL(log_plan) || + OB_ISNULL(schema_guard = log_plan->get_optimizer_context().get_schema_guard())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected status", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(MTL_ID(), ref_table_id, table_schema))) { + LOG_WARN("get table schema failed", K(ref_table_id), K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table schema is null", K(ret), K(table_schema)); + } else if (OB_FAIL(table_schema->check_has_vector_index(*schema_guard, need_vec_vid_merge_iter))) { + LOG_WARN("fail to check has fts index", K(ret), KPC(table_schema)); + } else if (need_vec_vid_merge_iter) { + LOG_TRACE("has fts index, need doc id merge iter", K(ret), K(ref_table_id)); + } + return ret; +} + +int ObDmlCgService::generate_scan_with_vec_vid_ctdef_if_need( + ObLogInsert &op, + const IndexDMLInfo &index_dml_info, + ObDASScanCtDef &scan_ctdef, + ObDASAttachSpec &attach_spec) +{ + int ret = OB_SUCCESS; + bool need_vec_vid_merge_iter = false; + ObArray result_outputs; + ObDASVIdMergeCtDef *vec_vid_merge_ctdef = nullptr; + ObDASScanCtDef *rowkey_vid_scan_ctdef = nullptr; + if (OB_FAIL(check_need_vec_vid_merge_iter(op, index_dml_info.ref_table_id_, need_vec_vid_merge_iter))) { + LOG_WARN("fail to check need vec vid merge iter", K(ret)); + } else if (!need_vec_vid_merge_iter) { + // just skip, nothing to do + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_VID_MERGE, cg_.phy_plan_->get_allocator(), + vec_vid_merge_ctdef))) { + LOG_WARN("fail to allocate to vec vid merge ctdef", K(ret)); + } else if (OB_ISNULL(vec_vid_merge_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &cg_.phy_plan_->get_allocator(), 2))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate vec vid merge ctdef child array memory", K(ret)); + } else if (OB_FAIL(generate_rowkey_vid_ctdef(op, index_dml_info, attach_spec, rowkey_vid_scan_ctdef))) { + LOG_WARN("fail to generate rowkey vid ctdef", K(ret)); + } else if (OB_FAIL(result_outputs.assign(scan_ctdef.result_output_))) { + LOG_WARN("construct aux lookup ctdef failed", K(ret)); + } else if (OB_UNLIKELY(result_outputs.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, result outputs is nullptr", K(ret)); + } else { + vec_vid_merge_ctdef->children_cnt_ = 2; + vec_vid_merge_ctdef->children_[0] = &scan_ctdef; + vec_vid_merge_ctdef->children_[1] = rowkey_vid_scan_ctdef; + if (OB_FAIL(vec_vid_merge_ctdef->result_output_.assign(result_outputs))) { + LOG_WARN("fail to assign result output", K(ret)); + } else { + attach_spec.attach_ctdef_ = static_cast(vec_vid_merge_ctdef); + } + } + return ret; +} + +int ObDmlCgService::generate_rowkey_vid_ctdef( + ObLogInsert &op, + const IndexDMLInfo &index_dml_info, + ObDASAttachSpec &attach_spec, + ObDASScanCtDef *&rowkey_vid_scan_ctdef) +{ + int ret = OB_SUCCESS; + const ObTableSchema *data_schema = nullptr; + const ObTableSchema *rowkey_vid_schema = nullptr; + ObDASScanCtDef *scan_ctdef = nullptr; + ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); + ObDASTableLocMeta *loc_meta = nullptr; + uint64_t rowkey_vid_tid = OB_INVALID_ID; + + if (OB_ISNULL(schema_guard)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, schema guard is nullptr", K(ret), KP(cg_.opt_ctx_)); + } else if (OB_FAIL(schema_guard->get_table_schema(index_dml_info.ref_table_id_, data_schema))) { + LOG_WARN("get table schema failed", K(ret), K(index_dml_info.ref_table_id_)); + } else if (OB_ISNULL(data_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get data table schema", K(ret)); + } else if (OB_FAIL(data_schema->get_rowkey_vid_tid(rowkey_vid_tid))) { + LOG_WARN("failed to get rowkey vid tid", K(ret), KPC(data_schema)); + } else if (OB_FAIL(schema_guard->get_table_schema(index_dml_info.ref_table_id_, + rowkey_vid_tid, + op.get_stmt(), + rowkey_vid_schema))) { + LOG_WARN("get table schema failed", K(ret), K(rowkey_vid_tid)); + } else if (OB_ISNULL(rowkey_vid_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get rowkey vid schema", K(ret), K(rowkey_vid_tid)); + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, cg_.phy_plan_->get_allocator(), scan_ctdef))) { + LOG_WARN("alloc das ctdef failed", K(ret)); + } else if (OB_FAIL(generate_rowkey_vid_access_expr(index_dml_info.column_exprs_, + *rowkey_vid_schema, + scan_ctdef))) { + LOG_WARN("fail to generate rowkey doc access expr", K(ret), K(index_dml_info)); + } else if (OB_ISNULL(loc_meta = OB_NEWx(ObDASTableLocMeta, &cg_.phy_plan_->get_allocator(), cg_.phy_plan_->get_allocator()))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate rowkey doc scan location meta failed", K(ret)); + } else { + scan_ctdef->ref_table_id_ = rowkey_vid_tid; + loc_meta->table_loc_id_ = index_dml_info.loc_table_id_; + loc_meta->ref_table_id_ = rowkey_vid_tid; + loc_meta->select_leader_ = 1; + loc_meta->is_dup_table_ = (ObDuplicateScope::DUPLICATE_SCOPE_NONE != rowkey_vid_schema->get_duplicate_scope()); + loc_meta->unuse_related_pruning_ = (OB_PHY_PLAN_DISTRIBUTED == cg_.opt_ctx_->get_phy_plan_type() + && !cg_.opt_ctx_->get_root_stmt()->is_insert_stmt()); + loc_meta->is_external_table_ = rowkey_vid_schema->is_external_table(); + loc_meta->is_external_files_on_disk_ = + ObSQLUtils::is_external_files_on_local_disk(rowkey_vid_schema->get_external_file_location()); + scan_ctdef->table_param_.get_enable_lob_locator_v2() + = (cg_.get_cur_cluster_version() >= CLUSTER_VERSION_4_1_0_0); + scan_ctdef->schema_version_ = rowkey_vid_schema->get_schema_version(); + ObSEArray vid_id_expr; + if (OB_FAIL(attach_spec.attach_loc_metas_.push_back(loc_meta))) { + LOG_WARN("store scan loc meta failed", K(ret)); + } else if (OB_FAIL(scan_ctdef->table_param_.convert(*rowkey_vid_schema, scan_ctdef->access_column_ids_, + scan_ctdef->pd_expr_spec_.pd_storage_flag_))) { + LOG_WARN("fail to convert table param", K(ret)); + } else if (OB_FAIL(cg_.tsc_cg_service_.generate_das_result_output(scan_ctdef->access_column_ids_, + vid_id_expr, + *scan_ctdef, + nullptr))) { + LOG_WARN("fail to generate das result output", K(ret)); + } else { + rowkey_vid_scan_ctdef = scan_ctdef; + } + } + return ret; +} + +int ObDmlCgService::generate_rowkey_vid_access_expr( + const common::ObIArray &columns, + const ObTableSchema &rowkey_vid, + ObDASScanCtDef *ctdef) +{ + int ret = OB_SUCCESS; + ObSEArray access_exprs; + ObArray rowkey_vid_column_ids; + if (OB_ISNULL(ctdef) || OB_UNLIKELY(columns.count() <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(ctdef), K(columns)); + } else if (OB_FAIL(rowkey_vid.get_column_ids(rowkey_vid_column_ids))) { + LOG_WARN("fail to get column ids from rowkey doc", K(ret), K(rowkey_vid)); + } else if (OB_FAIL(ctdef->access_column_ids_.init(rowkey_vid_column_ids.count()))) { + LOG_WARN("fail to init output_column_ids_ ", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < columns.count(); ++i) { + ObColumnRefRawExpr *expr = columns.at(i); + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, expr is nullptr", K(ret), K(i), K(columns)); + } else if (has_exist_in_array(rowkey_vid_column_ids, expr->get_column_id())) { + if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(expr)))) { + LOG_WARN("failed to add param expr", K(ret)); + } else if (OB_FAIL(ctdef->access_column_ids_.push_back(expr->get_column_id()))) { + LOG_WARN("fail to push back column id", K(ret)); + } + } + } + if (FAILEDx(cg_.generate_rt_exprs(access_exprs, ctdef->pd_expr_spec_.access_exprs_))) { + LOG_WARN("fail to generate rt exprs", K(ret)); + } + } + LOG_TRACE("generate rowkey vid access expr", K(ret), K(access_exprs), K(rowkey_vid_column_ids), K(columns)); + return ret; +} + } // namespace sql } // namespace oceanbase diff --git a/src/sql/code_generator/ob_dml_cg_service.h b/src/sql/code_generator/ob_dml_cg_service.h index 40286391b8..75bfd7e657 100644 --- a/src/sql/code_generator/ob_dml_cg_service.h +++ b/src/sql/code_generator/ob_dml_cg_service.h @@ -236,6 +236,12 @@ private: uint32_t proj_idx, ObDASDMLBaseCtDef &das_ctdef, IntFixedArray &row_projector); + template + int add_vec_idx_col_projector(const ObIArray &cur_row, + const ObIArray &full_row, + const ObIArray &dml_column_ids, + ObDASDMLBaseCtDef &das_ctdef, + IntFixedArray &row_projector); int fill_multivalue_extra_info_on_table_param( share::schema::ObSchemaGetterGuard *guard, const ObTableSchema *index_schema, @@ -388,7 +394,25 @@ private: const common::ObIArray &new_row, DASInsCtDefArray &ins_ctdefs); int generate_access_exprs(const common::ObIArray &columns, - common::ObIArray &access_exprs); + const ObLogicalOperator &op, + const bool need_vec_vid, + const uint64_t vec_vid_col_id, + common::ObIArray &access_exprs, + common::ObIArray &vec_vid_expr); + int generate_scan_with_vec_vid_ctdef_if_need(ObLogInsert &op, + const IndexDMLInfo &index_dml_info, + ObDASScanCtDef &scan_ctdef, + ObDASAttachSpec &attach_spec); + int generate_rowkey_vid_ctdef(ObLogInsert &op, + const IndexDMLInfo &index_dml_info, + ObDASAttachSpec &attach_spec, + ObDASScanCtDef *&rowkey_vid_scan_ctdef); + int generate_rowkey_vid_access_expr(const common::ObIArray &columns, + const ObTableSchema &rowkey_vid, + ObDASScanCtDef *ctdef); + int check_need_vec_vid_merge_iter(ObLogicalOperator &op, + const uint64_t ref_table_id, + bool &need_vec_vid_merge_iter); private: int need_fire_update_event(const ObTableSchema &table_schema, const ObString &update_events, diff --git a/src/sql/code_generator/ob_static_engine_cg.cpp b/src/sql/code_generator/ob_static_engine_cg.cpp index 17fe0c9af3..55e571697a 100644 --- a/src/sql/code_generator/ob_static_engine_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_cg.cpp @@ -931,7 +931,8 @@ int ObStaticEngineCG::generate_calc_exprs( && T_PSEUDO_EXTERNAL_FILE_URL != raw_expr->get_expr_type() && T_PSEUDO_PARTITION_LIST_COL != raw_expr->get_expr_type() && !(raw_expr->is_const_expr() || raw_expr->has_flag(IS_DYNAMIC_USER_VARIABLE)) - && !(T_FUN_SYS_PART_HASH == raw_expr->get_expr_type() || T_FUN_SYS_PART_KEY == raw_expr->get_expr_type())) { + && !(T_FUN_SYS_PART_HASH == raw_expr->get_expr_type() || T_FUN_SYS_PART_KEY == raw_expr->get_expr_type()) + && !(T_FUN_SYS_L2_DISTANCE == raw_expr->get_expr_type())) { if (raw_expr->is_calculated()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("expr is not from the child_op_output but it has been caculated already", @@ -1133,6 +1134,9 @@ int ObStaticEngineCG::generate_merge_distinct_spec( } else if (OB_UNLIKELY(ObRoaringBitmapType == raw_expr->get_data_type())) { ret = OB_ERR_INVALID_TYPE_FOR_OP; LOG_WARN("select distinct roaringbitmap not allowed", K(ret)); + } else if (OB_UNLIKELY(ObCollectionSQLType == raw_expr->get_data_type())) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("select distinct array not allowed", K(ret)); } else if (raw_expr->is_const_expr()) { // distinct const value, 这里需要注意:distinct 1被跳过了, // 但ObMergeDistinct中,如果没有distinct列,则默认所有值都相等,这个语义正好是符合预期的。 @@ -1226,6 +1230,9 @@ int ObStaticEngineCG::generate_spec( } else if (OB_UNLIKELY(ObRoaringBitmapType == raw_expr->get_data_type())) { ret = OB_ERR_INVALID_TYPE_FOR_OP; LOG_WARN("select distinct roaringbitmap not allowed", K(ret)); + } else if (OB_UNLIKELY(ObCollectionSQLType == raw_expr->get_data_type())) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("select distinct array not allowed", K(ret)); } else if (raw_expr->is_const_expr()) { // distinct const value, 这里需要注意:distinct 1被跳过了, // 但ObMergeDistinct中,如果没有distinct列,则默认所有值都相等,这个语义正好是符合预期的。 @@ -1344,6 +1351,9 @@ int ObStaticEngineCG::generate_spec(ObLogDistinct &op, ObHashDistinctVecSpec &sp } else if (OB_UNLIKELY(ObRoaringBitmapType == raw_expr->get_data_type())) { ret = OB_ERR_INVALID_TYPE_FOR_OP; LOG_WARN("select distinct roaringbitmap not allowed", K(ret)); + } else if (OB_UNLIKELY(ObCollectionSQLType == raw_expr->get_data_type())) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("select distinct array not allowed", K(ret)); } else if (is_oracle_mode() && OB_UNLIKELY(ObGeometryType == raw_expr->get_data_type())) { ret = OB_ERR_COMPARE_VARRAY_LOB_ATTR; LOG_WARN("select distinct geometry not allowed", K(ret)); @@ -2159,6 +2169,9 @@ int ObStaticEngineCG::fill_sort_funcs( } else if (OB_UNLIKELY(ObRoaringBitmapType == expr->datum_meta_.type_)) { ret = OB_ERR_INVALID_TYPE_FOR_OP; LOG_WARN("order by roaringbitmap not allowed", K(ret)); + } else if (OB_UNLIKELY(ObCollectionSQLType == expr->datum_meta_.type_)) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("order by collection not allowed", K(ret)); } else { ObSortCmpFunc cmp_func; cmp_func.cmp_func_ = ObDatumFuncs::get_nullsafe_cmp_func(expr->datum_meta_.type_, @@ -4818,6 +4831,9 @@ int ObStaticEngineCG::generate_spec(ObLogGroupBy &op, ObMergeGroupBySpec &spec, if (ObRoaringBitmapType == raw_expr->get_data_type()) { ret = OB_ERR_INVALID_TYPE_FOR_OP; LOG_WARN("group by roaringbitmap not allowed", K(ret)); + } else if (ObCollectionSQLType == raw_expr->get_data_type()) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("order by collection not allowed", K(ret)); } else if (OB_FAIL(generate_rt_expr(*raw_expr, expr))) { LOG_WARN("failed to generate_rt_expr", K(ret)); } else if (OB_FAIL(spec.add_group_expr(expr))) { @@ -5087,6 +5103,9 @@ int ObStaticEngineCG::generate_spec(ObLogGroupBy &op, ObHashGroupBySpec &spec, if (ObRoaringBitmapType == raw_expr->get_data_type()) { ret = OB_ERR_INVALID_TYPE_FOR_OP; LOG_WARN("group by roaringbitmap not allowed", K(ret)); + } else if (ObCollectionSQLType == raw_expr->get_data_type()) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("order by collection not allowed", K(ret)); } else if (OB_FAIL(generate_rt_expr(*raw_expr, expr))) { LOG_WARN("failed to generate_rt_expr", K(ret)); } else if (OB_FAIL(spec.add_group_expr(expr))) { diff --git a/src/sql/code_generator/ob_static_engine_expr_cg.cpp b/src/sql/code_generator/ob_static_engine_expr_cg.cpp index 157325a856..9135a8a866 100644 --- a/src/sql/code_generator/ob_static_engine_expr_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_expr_cg.cpp @@ -250,6 +250,37 @@ int ObStaticEngineExprCG::cg_exprs(const ObIArray &raw_exprs, return ret; } +int ObStaticEngineExprCG::init_attr_expr(ObExpr *rt_expr, ObRawExpr *raw_expr) +{ + int ret = OB_SUCCESS; + rt_expr->attrs_cnt_ = raw_expr->get_attr_count(); + // init attrs_; + if (rt_expr->attrs_cnt_ > 0) { + int64_t alloc_size = rt_expr->attrs_cnt_ * sizeof(ObExpr *); + ObExpr **buf = static_cast(allocator_.alloc(alloc_size)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory", K(ret)); + } else { + memset(buf, 0, alloc_size); + rt_expr->attrs_ = buf; + for (int64_t i = 0; OB_SUCC(ret) && i < raw_expr->get_attr_count(); i++) { + ObRawExpr *child_expr = NULL; + if (OB_ISNULL(child_expr = raw_expr->get_attr_expr(i))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret)); + } else if (OB_ISNULL(get_rt_expr(*child_expr))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("expr is null", K(ret)); + } else { + rt_expr->attrs_[i] = get_rt_expr(*child_expr); + } + } + } + } + return ret; +} + // init type_, datum_meta_, obj_meta_, obj_datum_map_, args_, arg_cnt_, op_ int ObStaticEngineExprCG::cg_expr_basic(const ObIArray &raw_exprs) { @@ -286,6 +317,8 @@ int ObStaticEngineExprCG::cg_expr_basic(const ObIArray &raw_exprs) if (result_meta.is_xml_sql_type()) { // set xml subschema id = ObXMLSqlType rt_expr->datum_meta_.cs_type_ = CS_TYPE_INVALID; + } else if (result_meta.is_collection_sql_type()) { + rt_expr->datum_meta_.cs_type_ = static_cast(result_meta.get_subschema_id()); } if (is_lob_storage(rt_expr->obj_meta_.get_type())) { if (cur_cluster_version_ >= CLUSTER_VERSION_4_1_0_0) { @@ -368,6 +401,9 @@ int ObStaticEngineExprCG::cg_expr_basic(const ObIArray &raw_exprs) } } } + if (result_meta.is_collection_sql_type() && OB_FAIL(init_attr_expr(rt_expr, raw_expr))) { + LOG_WARN("failed to init attr expr", K(ret), K(raw_expr), K(rt_expr)); + } if (OB_SUCC(ret) && raw_expr->get_local_session_var().get_var_count() > 0) { rt_expr->local_session_var_id_ = raw_expr->get_local_session_var_id(); } diff --git a/src/sql/code_generator/ob_static_engine_expr_cg.h b/src/sql/code_generator/ob_static_engine_expr_cg.h index cd3de81dfb..29ef2f850f 100644 --- a/src/sql/code_generator/ob_static_engine_expr_cg.h +++ b/src/sql/code_generator/ob_static_engine_expr_cg.h @@ -191,6 +191,8 @@ private: // row_dimension_, op_ int cg_expr_basic(const common::ObIArray &raw_exprs); + int init_attr_expr(ObExpr *rt_expr, ObRawExpr *raw_expr); + // init parent_cnt_, parents_ int cg_expr_parents(const common::ObIArray &raw_exprs); diff --git a/src/sql/code_generator/ob_tsc_cg_service.cpp b/src/sql/code_generator/ob_tsc_cg_service.cpp index 83e36724f4..64bcfaa50e 100644 --- a/src/sql/code_generator/ob_tsc_cg_service.cpp +++ b/src/sql/code_generator/ob_tsc_cg_service.cpp @@ -16,6 +16,7 @@ #include "sql/engine/table/ob_table_scan_op.h" #include "sql/engine/basic/ob_pushdown_filter.h" #include "share/inner_table/ob_inner_table_schema.h" +#include "src/share/vector_index/ob_vector_index_util.h" namespace oceanbase { using namespace common; @@ -101,7 +102,7 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc if (OB_SUCC(ret)) { bool has_rowscn = false; scan_ctdef.ref_table_id_ = op.get_real_index_table_id(); - if (op.is_text_retrieval_scan()) { + if (op.is_text_retrieval_scan() || op.is_vec_idx_scan()) { scan_ctdef.ir_scan_type_ = ObTSCIRScanType::OB_IR_INV_IDX_SCAN; } if (OB_FAIL(generate_das_scan_ctdef(op, scan_ctdef, has_rowscn))) { @@ -188,6 +189,14 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc } } + if (OB_SUCC(ret) && op.is_vec_idx_scan()) { + if (OB_FAIL(generate_vec_ir_ctdef(op, tsc_ctdef, root_ctdef))) { + LOG_WARN("failed to generate text ir ctdef", K(ret)); + } else { + need_attach = true; + } + } + if (OB_SUCC(ret) && op.get_index_back()) { ObDASTableLookupCtDef *lookup_ctdef = nullptr; if (OB_FAIL(generate_table_lookup_ctdef(op, tsc_ctdef, root_ctdef, lookup_ctdef))) { @@ -197,6 +206,19 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc } } + ObDASVIdMergeCtDef *vid_merge_ctdef = nullptr; + if (OB_SUCC(ret) && op.is_tsc_with_vid()) { + if (OB_UNLIKELY(root_ctdef != &scan_ctdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, root ctdef isn't equal to scan ctdef", K(ret)); + } else if (OB_FAIL(generate_das_scan_ctdef_with_vid(op, tsc_ctdef, &scan_ctdef, vid_merge_ctdef))) { + LOG_WARN("fail to generate das scan ctdef with doc id", K(ret)); + } else { + root_ctdef = vid_merge_ctdef; + need_attach = true; + } + } + if (OB_SUCC(ret) && need_attach) { tsc_ctdef.lookup_ctdef_ = nullptr; tsc_ctdef.lookup_loc_meta_ = nullptr; @@ -238,6 +260,7 @@ int ObTscCgService::generate_table_param(const ObLogTableScan &op, } else if (table_schema->is_spatial_index() && FALSE_IT(scan_ctdef.table_param_.set_is_spatial_index(true))) { } else if (table_schema->is_fts_index() && FALSE_IT(scan_ctdef.table_param_.set_is_fts_index(true))) { } else if (table_schema->is_multivalue_index_aux() && FALSE_IT(scan_ctdef.table_param_.set_is_multivalue_index(true))) { + } else if (table_schema->is_vec_index() && FALSE_IT(scan_ctdef.table_param_.set_is_vec_index(true))) { } else if (OB_FAIL(extract_das_output_column_ids(op, scan_ctdef, *table_schema, tsc_out_cols))) { LOG_WARN("extract tsc output column ids failed", K(ret)); } else if (OB_FAIL(session_info->get_sys_variable(SYS_VAR_OB_ROUTE_POLICY, route_policy))) { @@ -270,6 +293,7 @@ int ObTscCgService::generate_table_param(const ObLogTableScan &op, } int ObTscCgService::generate_das_result_output(const ObIArray &output_cids, + common::ObIArray &vec_vid_expr, ObDASScanCtDef &scan_ctdef, const ObRawExpr *trans_info_expr, const bool include_agg) @@ -281,15 +305,33 @@ int ObTscCgService::generate_das_result_output(const ObIArray &output_ int64_t access_expr_cnt = access_exprs.count(); int64_t agg_expr_cnt = include_agg ? agg_exprs.count() : 0; int64_t trans_expr_cnt = trans_info_expr == nullptr ? 0 : 1; - if (OB_UNLIKELY(access_column_cnt != access_expr_cnt)) { + uint64_t vec_vid_col_id = OB_INVALID_ID; + if (OB_UNLIKELY(access_column_cnt != access_expr_cnt || vec_vid_expr.count() > 1)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("access column count is invalid", K(ret), K(access_column_cnt), K(access_expr_cnt)); - } else if (OB_FAIL(scan_ctdef.result_output_.init(output_cids.count() + agg_expr_cnt + trans_expr_cnt))) { + LOG_WARN("access column count is invalid", K(ret), K(access_column_cnt), K(access_expr_cnt), K(vec_vid_expr)); + } else if (OB_FAIL(scan_ctdef.result_output_.init(output_cids.count() + vec_vid_expr.count() + agg_expr_cnt + trans_expr_cnt))) { LOG_WARN("init result output failed", K(ret)); + } else if (vec_vid_expr.count() > 0) { + const ObTableSchema *table_schema = nullptr; + if (OB_FAIL(cg_.opt_ctx_->get_schema_guard()->get_table_schema(MTL_ID(), scan_ctdef.ref_table_id_, table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(scan_ctdef.ref_table_id_)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to table schema", K(ret)); + } else if (OB_FAIL(table_schema->get_vec_index_vid_col_id(vec_vid_col_id))) { + LOG_WARN("fail to get vec index vid column ids", K(ret)); + } else if (OB_INVALID_ID == vec_vid_col_id) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get vec vid column", K(ret)); + } } for (int64_t i = 0; OB_SUCC(ret) && i < output_cids.count(); ++i) { int64_t idx = OB_INVALID_INDEX; - if (!has_exist_in_array(scan_ctdef.access_column_ids_, output_cids.at(i), &idx)) { + if (vec_vid_expr.count() > 0 && vec_vid_col_id == output_cids.at(i)) { + if (OB_FAIL(scan_ctdef.result_output_.push_back(vec_vid_expr.at(0)))) { + LOG_WARN("fail to push back doc id", K(ret), K(vec_vid_col_id)); + } + } else if (!has_exist_in_array(scan_ctdef.access_column_ids_, output_cids.at(i), &idx)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("output column does not exist in access column ids", K(ret), K(scan_ctdef.access_column_ids_), K(output_cids.at(i)), K(output_cids), @@ -585,6 +627,16 @@ int ObTscCgService::extract_das_access_exprs(const ObLogTableScan &op, if (OB_FAIL(extract_text_ir_access_columns(op, scan_ctdef, access_exprs))) { LOG_WARN("failed to extract text ir access columns", K(ret)); } + } else if (op.is_vec_idx_scan() && + (scan_table_id != op.get_ref_table_id() || scan_ctdef.ir_scan_type_ == OB_VEC_COM_AUX_SCAN) && + scan_table_id != op.get_rowkey_vid_table_id()) { + if (OB_FAIL(extract_vec_ir_access_columns(op, scan_ctdef, access_exprs))) { + LOG_WARN("failed to extract vector access columns", K(ret)); + } + } else if (op.is_tsc_with_vid() && scan_table_id == op.get_rowkey_vid_table_id()) { + if (OB_FAIL(extract_rowkey_vid_access_columns(op, scan_ctdef, access_exprs))) { + LOG_WARN("fail to extract rowkey doc access columns", K(ret)); + } } else if (op.get_index_back() && scan_table_id == op.get_real_index_table_id()) { //this das scan is index scan and will lookup the data table later //index scan + lookup data table: the index scan only need access @@ -658,7 +710,8 @@ int ObTscCgService::extract_das_access_exprs(const ObLogTableScan &op, ObRawExpr *expr = access_exprs.at(i); if (expr->is_column_ref_expr() && static_cast(expr)->is_virtual_generated_column() && - !static_cast(expr)->is_xml_column()) { + !static_cast(expr)->is_xml_column() && + !static_cast(expr)->is_vec_vid_column()) { // do nothing. } else { if (OB_FAIL(add_var_to_array_no_dup(tmp_access_exprs, expr))) { @@ -734,6 +787,7 @@ int ObTscCgService::generate_geo_access_ctdef(const ObLogTableScan &op, const Ob int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef, + common::ObIArray &vec_vid_expr, bool &has_rowscn) { int ret = OB_SUCCESS; @@ -742,6 +796,9 @@ int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op, ObTableID table_id = scan_ctdef.ref_table_id_; ObArray access_column_ids; ObArray access_exprs; + ObArray scan_param_access_exprs; + ObArray vec_vid_access_expr; + if (OB_FAIL(cg_.opt_ctx_->get_schema_guard()->get_table_schema(MTL_ID(), table_id, table_schema))) { LOG_WARN("get table schema failed", K(ret), K(table_id)); } else if (OB_ISNULL(table_schema)) { @@ -759,16 +816,15 @@ int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op, ARRAY_FOREACH(access_exprs, i) { ObRawExpr *expr = access_exprs.at(i); + bool is_vec_vid_access_expr = false; if (OB_UNLIKELY(OB_ISNULL(expr))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("expr is null", K(ret)); } else if (T_ORA_ROWSCN == expr->get_expr_type()) { //only data table need to produce rowscn - if (table_schema->is_index_table()) { - if (op.is_index_scan() && !op.get_index_back()) { + if (table_schema->is_index_table() && op.is_index_scan() && !op.get_index_back()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("rowscn only can be produced by data table", K(ret)); - } } else if (OB_FAIL(access_column_ids.push_back(OB_HIDDEN_TRANS_VERSION_COLUMN_ID))) { LOG_WARN("store output column ids failed", K(ret)); } else { @@ -785,19 +841,32 @@ int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op, ObColumnRefRawExpr* col_expr = static_cast(expr); bool is_mapping_vt_table = op.get_real_ref_table_id() != op.get_ref_table_id(); ObTableID real_table_id = is_mapping_vt_table ? op.get_real_ref_table_id() : op.get_table_id(); - if (!col_expr->has_flag(IS_COLUMN) || (col_expr->get_table_id() != real_table_id && !col_expr->is_doc_id_column())) { + real_table_id = op.is_tsc_with_vid() && table_schema->is_vec_rowkey_vid_type() ? table_id : real_table_id; + if (!col_expr->has_flag(IS_COLUMN) || (col_expr->get_table_id() != real_table_id && !(col_expr->is_doc_id_column() || col_expr->is_vec_vid_column()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Expected basic column", K(ret), K(*col_expr), K(col_expr->has_flag(IS_COLUMN)), K(col_expr->get_table_id()), K(real_table_id), K(op.get_real_ref_table_id()), K(op.get_ref_table_id()), K(op.get_table_id()), K(op.get_real_index_table_id())); + } else if (op.is_tsc_with_vid() && table_schema->is_user_table() && col_expr->is_vec_vid_column()) { + // skip vec vid column in data table + is_vec_vid_access_expr = true; } else if (OB_FAIL(access_column_ids.push_back(col_expr->get_column_id()))) { LOG_WARN("store column id failed", K(ret)); } } + if (OB_SUCC(ret)) { + if (is_vec_vid_access_expr && OB_FAIL(vec_vid_access_expr.push_back(expr))) { + LOG_WARN("fail to push back vec vid access expr", K(ret)); + } else if (!is_vec_vid_access_expr && OB_FAIL(scan_param_access_exprs.push_back(expr))) { + LOG_WARN("fail to push back scan param access expr", K(ret)); + } + } } // end for if (OB_SUCC(ret)) { - if (OB_FAIL(cg_.generate_rt_exprs(access_exprs, scan_ctdef.pd_expr_spec_.access_exprs_))) { - LOG_WARN("generate rt exprs failed", K(ret), K(access_exprs)); + if (OB_FAIL(cg_.generate_rt_exprs(scan_param_access_exprs, scan_ctdef.pd_expr_spec_.access_exprs_))) { + LOG_WARN("generate rt exprs failed", K(ret), K(scan_param_access_exprs)); + } else if (OB_FAIL(cg_.generate_rt_exprs(vec_vid_access_expr, vec_vid_expr))) { + LOG_WARN("fail to generate no access rt exprs", K(ret)); } else if (OB_FAIL(cg_.mark_expr_self_produced(access_exprs))) { LOG_WARN("makr expr self produced failed", K(ret), K(access_exprs)); } else if (OB_FAIL(scan_ctdef.access_column_ids_.assign(access_column_ids))) { @@ -901,8 +970,9 @@ int ObTscCgService::generate_das_scan_ctdef(const ObLogTableScan &op, bool &has_rowscn) { int ret = OB_SUCCESS; + ObSEArray vec_vid_expr; // 1. add basic column - if (OB_FAIL(generate_access_ctdef(op, scan_ctdef, has_rowscn))) { + if (OB_FAIL(generate_access_ctdef(op, scan_ctdef, vec_vid_expr, has_rowscn))) { LOG_WARN("generate access ctdef failed", K(ret), K(scan_ctdef.ref_table_id_)); } //2. generate pushdown aggr column @@ -947,7 +1017,7 @@ int ObTscCgService::generate_das_scan_ctdef(const ObLogTableScan &op, //7. generate das result output if (OB_SUCC(ret)) { const bool pd_agg = scan_ctdef.pd_expr_spec_.pd_storage_flag_.is_aggregate_pushdown(); - if (OB_FAIL(generate_das_result_output(tsc_out_cols, scan_ctdef, op.get_trans_info_expr(), pd_agg))) { + if (OB_FAIL(generate_das_result_output(tsc_out_cols, vec_vid_expr, scan_ctdef, op.get_trans_info_expr(), pd_agg))) { LOG_WARN("failed to init result outputs", K(ret)); } } @@ -969,6 +1039,17 @@ int ObTscCgService::extract_das_output_column_ids(const ObLogTableScan &op, if (OB_FAIL(extract_text_ir_das_output_column_ids(op, scan_ctdef, output_cids))) { LOG_WARN("failed to extract text retrieval das output column ids", K(ret)); } + } else if (op.is_vec_idx_scan() && + (table_id != op.get_ref_table_id() || scan_ctdef.ir_scan_type_ == OB_VEC_COM_AUX_SCAN) && + table_id != op.get_rowkey_vid_table_id()) { + // non main table scan in text retrieval + if (OB_FAIL(extract_vector_das_output_column_ids(op, scan_ctdef, output_cids))) { + LOG_WARN("failed to extract vector das output column ids", K(ret)); + } + } else if (op.is_tsc_with_vid() && table_id == op.get_rowkey_vid_table_id()) { + if (OB_FAIL(extract_rowkey_vid_output_columns_ids(index_schema, op, scan_ctdef, output_cids))) { + LOG_WARN("fail to extract rowkey doc output columns ids", K(ret)); + } } else if ((op.get_index_back() || op.is_multivalue_index_scan()) && op.get_real_index_table_id() == table_id) { //this situation is index lookup, and the index table scan is being processed //the output column id of index lookup is the rowkey of the data table @@ -1044,6 +1125,21 @@ int ObTscCgService::extract_das_output_column_ids(const ObLogTableScan &op, LOG_WARN("store group id expr failed", K(ret)); } else if (OB_FAIL(extract_das_column_ids(das_output_cols, output_cids))) { LOG_WARN("extract column ids failed", K(ret)); + } else if (op.is_tsc_with_vid() && index_schema.is_user_table()) { + uint64_t vid_id_col_id = OB_INVALID_ID; + if (OB_FAIL(index_schema.get_vec_index_vid_col_id(vid_id_col_id))) { + LOG_WARN("fail to get fulltext column ids", K(ret)); + } else if (OB_INVALID_ID == vid_id_col_id) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get doc id column", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < output_cids.count(); ++i) { + if (output_cids.at(i) == vid_id_col_id) { + scan_ctdef.vec_vid_idx_ = i; + break; + } + } + } } return ret; } @@ -1144,6 +1240,157 @@ int ObTscCgService::generate_table_loc_meta(uint64_t table_loc_id, return ret; } +int ObTscCgService::generate_vec_ir_ctdef(const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASBaseCtDef *&root_ctdef) +{ + int ret = OB_SUCCESS; + ObIAllocator &ctdef_alloc = cg_.phy_plan_->get_allocator(); + ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); + ObDASVecAuxScanCtDef *vec_scan_ctdef = nullptr; + ObDASSortCtDef *sort_ctdef = nullptr; + int64_t dim = 0; + const ObTableSchema *delta_buf_table_schema = NULL; + if (OB_ISNULL(schema_guard)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer", K(ret)); + } else if (OB_UNLIKELY(OB_INVALID_ID == op.get_vector_index_info().delta_buffer_tid_ + || OB_INVALID_ID == op.get_vector_index_info().index_id_tid_ + || OB_INVALID_ID == op.get_vector_index_info().index_snapshot_data_tid_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid vector index table id", K(ret)); + } else if (OB_UNLIKELY(ObTSCIRScanType::OB_IR_INV_IDX_SCAN != tsc_ctdef.scan_ctdef_.ir_scan_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected ir scan type for inverted index scan", K(ret), K(tsc_ctdef.scan_ctdef_)); + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_VEC_SCAN, ctdef_alloc, vec_scan_ctdef))) { + LOG_WARN("allocate ir scan ctdef failed", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(op.get_vector_index_info().delta_buffer_tid_, delta_buf_table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(op.get_vector_index_info().delta_buffer_tid_)); + } else if (OB_ISNULL(delta_buf_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get table schema", K(ret)); + } else if (OB_FAIL(ObVectorIndexUtil::get_vector_index_column_dim(*delta_buf_table_schema, dim))) { + LOG_WARN("fail to get vec_index_col_param", K(ret)); + } else { + ObDASScanCtDef *inv_idx_scan_ctdef = &tsc_ctdef.scan_ctdef_; + ObDASScanCtDef *delta_ctdef = nullptr; + ObDASScanCtDef *index_id_ctdef = nullptr; + ObDASScanCtDef *snapshot_ctdef = nullptr; + ObDASScanCtDef *com_aux_ctdef = nullptr; + bool has_rowscn = false; + if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, delta_ctdef))) { + LOG_WARN("allocate delta buf table ctdef failed", K(ret)); + } else { + delta_ctdef->ref_table_id_ = op.get_vector_index_info().delta_buffer_tid_; + delta_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_VEC_DELTA_BUF_SCAN; + if (OB_FAIL(generate_das_scan_ctdef(op, *delta_ctdef, has_rowscn))) { + LOG_WARN("failed to generate das scan ctdef", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, index_id_ctdef))) { + LOG_WARN("allocate index id table ctdef failed", K(ret)); + } else { + index_id_ctdef->ref_table_id_ = op.get_vector_index_info().index_id_tid_; + index_id_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_VEC_IDX_ID_SCAN; + if (OB_FAIL(generate_das_scan_ctdef(op, *index_id_ctdef, has_rowscn))) { + LOG_WARN("failed to generate das scan ctdef", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, snapshot_ctdef))) { + LOG_WARN("allocate snapshot table ctdef failed", K(ret)); + } else { + snapshot_ctdef->ref_table_id_ =op.get_vector_index_info().index_snapshot_data_tid_; + snapshot_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_VEC_SNAPSHOT_SCAN; + if (OB_FAIL(generate_das_scan_ctdef(op, *snapshot_ctdef, has_rowscn))) { + LOG_WARN("generate das scan ctdef failed", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, com_aux_ctdef))) { + LOG_WARN("allocate snapshot table ctdef failed", K(ret)); + } else { + com_aux_ctdef->ref_table_id_ = op.get_vector_index_info().main_table_tid_; + com_aux_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_VEC_COM_AUX_SCAN; + if (OB_FAIL(generate_das_scan_ctdef(op, *com_aux_ctdef, has_rowscn))) { + LOG_WARN("generate das scan ctdef failed", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + int64_t vec_child_task_cnt = 5; + if (OB_ISNULL(vec_scan_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &ctdef_alloc, vec_child_task_cnt))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate ir scan ctdef children failed", K(ret)); + } else if (OB_FAIL(ob_write_string(ctdef_alloc, delta_buf_table_schema->get_index_params(), vec_scan_ctdef->vec_index_param_))) { + LOG_WARN("fail to get index param", K(ret)); + } else { + vec_scan_ctdef->children_cnt_ = vec_child_task_cnt; // number of ObDASScanCtDef + vec_scan_ctdef->children_[0] = inv_idx_scan_ctdef; + vec_scan_ctdef->children_[1] = delta_ctdef; + vec_scan_ctdef->children_[2] = index_id_ctdef; + vec_scan_ctdef->children_[3] = snapshot_ctdef; + vec_scan_ctdef->children_[4] = com_aux_ctdef; + vec_scan_ctdef->dim_ = dim; + } + } + + if (OB_SUCC(ret)) { + ObRawExpr *expr = op.get_vector_index_info().sort_key_.expr_; + if (expr->is_vector_sort_expr()) { + for (int64_t i = 0; OB_SUCC(ret) && i < expr->get_param_count(); ++i) { + if (expr->get_param_expr(i)->is_column_ref_expr() && OB_FAIL(cg_.mark_expr_self_produced(expr->get_param_expr(i)))) { + LOG_WARN("mark expr self produced failed", K(ret), KPC(expr->get_param_expr(i))); + } + } + } + } + } + + if (OB_SUCC(ret)) { + root_ctdef = vec_scan_ctdef; + if (OB_FAIL(generate_vec_ir_spec_exprs(op, *vec_scan_ctdef))) { + LOG_WARN("failed to generate vec ir spec exprs", K(ret)); + } + } + + if (OB_SUCC(ret) && op.get_vector_index_info().need_sort()) { + ObSEArray order_items; + if (OB_FAIL(order_items.push_back(op.get_vector_index_info().sort_key_))) { + LOG_WARN("append order item array failed", K(ret)); + } else if (OB_FAIL(generate_das_sort_ctdef( + order_items, + false, + op.get_vector_index_info().topk_limit_expr_, + op.get_vector_index_info().topk_offset_expr_, + vec_scan_ctdef, + sort_ctdef))) { + LOG_WARN("generate sort ctdef failed", K(ret)); + } else { + root_ctdef = sort_ctdef; + } + } + + if (OB_SUCC(ret) && op.get_index_back()) { + ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr; + ObDASBaseCtDef *vir_output_ctdef = nullptr == sort_ctdef ? + static_cast(vec_scan_ctdef) : static_cast(sort_ctdef); + if (OB_FAIL(generate_vec_id_lookup_ctdef(op, tsc_ctdef, vir_output_ctdef, aux_lookup_ctdef))) { + LOG_WARN("generate vid lookup ctdef failed", K(ret)); + } else { + root_ctdef = aux_lookup_ctdef; + } + } + return ret; +} + int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef) @@ -1327,6 +1574,70 @@ int ObTscCgService::append_fts_relavence_project_col( return ret; } +int ObTscCgService::extract_vec_ir_access_columns( + const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &access_exprs) +{ + int ret = OB_SUCCESS; + const ObVectorIndexInfo &vec_info = op.get_vector_index_info(); + if (scan_ctdef.ref_table_id_ == op.get_doc_id_index_table_id()) { + if (OB_FAIL(extract_vec_id_index_back_access_columns(op, access_exprs))) { + LOG_WARN("failed to extract vid index back access columns", K(ret)); + } + } else { + switch (scan_ctdef.ir_scan_type_) { + case ObTSCIRScanType::OB_IR_INV_IDX_SCAN: { + if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(vec_info.vec_id_column_)))) { + LOG_WARN("failed to push document id column to access exprs", K(ret)); + } + break; + } + case ObTSCIRScanType::OB_VEC_DELTA_BUF_SCAN: { + if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(vec_info.delta_vid_column_)))) { + LOG_WARN("failed to add document length column to access exprs", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(vec_info.delta_type_column_)))) { + LOG_WARN("failed to add document length column to access exprs", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(vec_info.delta_vector_column_)))) { + LOG_WARN("failed to add document length column to access exprs", K(ret)); + } + break; + } + case ObTSCIRScanType::OB_VEC_IDX_ID_SCAN: { + if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(vec_info.index_id_scn_column_)))) { + LOG_WARN("failed to add document length column to access exprs", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(vec_info.index_id_vid_column_)))) { + LOG_WARN("failed to add document length column to access exprs", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(vec_info.index_id_type_column_)))) { + LOG_WARN("failed to add document length column to access exprs", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(vec_info.index_id_vector_column_)))) { + LOG_WARN("failed to add document length column to access exprs", K(ret)); + } + break; + } + case ObTSCIRScanType::OB_VEC_SNAPSHOT_SCAN: { + if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(vec_info.snapshot_key_column_)))) { + LOG_WARN("failed to add document length column to access exprs", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(vec_info.snapshot_data_column_)))) { + LOG_WARN("failed to add document length column to access exprs", K(ret)); + } + break; + } + case ObTSCIRScanType::OB_VEC_COM_AUX_SCAN: { + if (OB_FAIL(add_var_to_array_no_dup(access_exprs, static_cast(vec_info.target_vec_column_)))) { + LOG_WARN("failed to push document id column to access exprs", K(ret)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected text ir scan type", K(ret), K(scan_ctdef)); + } + } + } + return ret; +} + int ObTscCgService::extract_text_ir_access_columns( const ObLogTableScan &op, const ObDASScanCtDef &scan_ctdef, @@ -1372,6 +1683,31 @@ int ObTscCgService::extract_text_ir_access_columns( return ret; } +int ObTscCgService::extract_vector_das_output_column_ids( + const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &output_cids) +{ + int ret = OB_SUCCESS; + const ObVectorIndexInfo &vec_info = op.get_vector_index_info(); + if (scan_ctdef.ref_table_id_ == op.get_doc_id_index_table_id()) { + if (OB_FAIL(extract_doc_id_index_back_output_column_ids(op, output_cids))) { + LOG_WARN("failed to get vid index back cids", K(ret), K(scan_ctdef.ref_table_id_)); + } + } else if (ObTSCIRScanType::OB_IR_INV_IDX_SCAN == scan_ctdef.ir_scan_type_) { + if (OB_FAIL(output_cids.push_back( + static_cast(vec_info.vec_id_column_)->get_column_id()))) { + LOG_WARN("failed to push output vid col id", K(ret)); + } + } else if (scan_ctdef.ir_scan_type_ == ObTSCIRScanType::OB_VEC_COM_AUX_SCAN) { + if (OB_FAIL(output_cids.push_back( + static_cast(vec_info.target_vec_column_)->get_column_id()))) { + LOG_WARN("failed to push output vid col id", K(ret)); + } + } + return ret; +} + int ObTscCgService::extract_text_ir_das_output_column_ids( const ObLogTableScan &op, const ObDASScanCtDef &scan_ctdef, @@ -1475,6 +1811,45 @@ int ObTscCgService::generate_text_ir_pushdown_expr_ctdef( return ret; } +int ObTscCgService::generate_vec_ir_spec_exprs(const ObLogTableScan &op, + ObDASVecAuxScanCtDef &vec_ir_scan_ctdef) +{ + int ret = OB_SUCCESS; + const ObVectorIndexInfo &vec_info = op.get_vector_index_info(); + ObSEArray result_output; + if (OB_ISNULL(vec_info.vec_id_column_) || OB_ISNULL(vec_info.delta_type_column_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + const UIntFixedArray &scan_col_id = vec_ir_scan_ctdef.get_inv_idx_scan_ctdef()->access_column_ids_; + const ObColumnRefRawExpr *vec_id_column = static_cast(vec_info.vec_id_column_); + + int64_t vec_id_col_idx = -1; + for (int64_t i = 0; i < scan_col_id.count() && vec_id_col_idx == -1; ++i) { + uint64_t cur_col_id = scan_col_id.at(i); + if (cur_col_id == vec_id_column->get_column_id()) { + vec_id_col_idx = i; + } + } + if (OB_UNLIKELY(-1 == vec_id_col_idx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vid col idx not found in inverted index scan access columns", + K(ret), K(vec_id_column->get_column_id()), K(vec_ir_scan_ctdef), K(vec_id_col_idx)); + } else { + vec_ir_scan_ctdef.inv_scan_vec_id_col_ = + vec_ir_scan_ctdef.get_inv_idx_scan_ctdef()->pd_expr_spec_.access_exprs_.at(vec_id_col_idx); + if (OB_FAIL(result_output.push_back(vec_ir_scan_ctdef.inv_scan_vec_id_col_))) { + LOG_WARN("failed to append output exprs", K(ret)); + } + } + } + + if (FAILEDx(vec_ir_scan_ctdef.result_output_.assign(result_output))) { + LOG_WARN("failed to assign result output", K(ret), K(result_output)); + } + return ret; +} + int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op, ObDASIRScanCtDef &text_ir_scan_ctdef) { @@ -1551,6 +1926,79 @@ int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op, return ret; } +int ObTscCgService::generate_vec_id_lookup_ctdef(const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASBaseCtDef *vec_scan_ctdef, + ObDASIRAuxLookupCtDef *&aux_lookup_ctdef) +{ + int ret = OB_SUCCESS; + const ObTableSchema *data_schema = nullptr; + const ObTableSchema *index_schema = nullptr; + ObDASScanCtDef *scan_ctdef = nullptr; + ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); + uint64_t vec_id_index_tid = OB_INVALID_ID; + + aux_lookup_ctdef = nullptr; + if (OB_ISNULL(schema_guard)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to schema guard", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(op.get_ref_table_id(), data_schema))) { + LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id())); + } else if (OB_ISNULL(data_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get data table schema", K(ret)); + } else if (OB_FAIL(data_schema->get_vec_id_rowkey_tid(vec_id_index_tid))) { + LOG_WARN("failed to get vid rowkey index tid", K(ret), KPC(data_schema)); + } else if (OB_FAIL(schema_guard->get_table_schema(op.get_ref_table_id(), + vec_id_index_tid, + op.get_stmt(), + index_schema))) { + LOG_WARN("get table schema failed", K(ret), K(vec_id_index_tid)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get doc_id index schema", K(ret), K(vec_id_index_tid)); + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, cg_.phy_plan_->get_allocator(), scan_ctdef))) { + LOG_WARN("alloc das ctdef failed", K(ret)); + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_IR_AUX_LOOKUP, cg_.phy_plan_->get_allocator(), aux_lookup_ctdef))) { + LOG_WARN("alloc aux lookup ctdef failed", K(ret)); + } else if (OB_ISNULL(aux_lookup_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &cg_.phy_plan_->get_allocator(), 2))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else { + bool has_rowscn = false; + ObArray result_outputs; + scan_ctdef->ref_table_id_ = vec_id_index_tid; + aux_lookup_ctdef->children_cnt_ = 2; + ObDASTableLocMeta *scan_loc_meta = OB_NEWx(ObDASTableLocMeta, &cg_.phy_plan_->get_allocator(), cg_.phy_plan_->get_allocator()); + if (OB_ISNULL(scan_loc_meta)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate scan location meta failed", K(ret)); + } else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) { + LOG_WARN("generate das lookup scan ctdef failed", K(ret)); + } else if (OB_FAIL(result_outputs.assign(scan_ctdef->result_output_))) { + LOG_WARN("construct aux lookup ctdef failed", K(ret)); + } else if (OB_FAIL(generate_table_loc_meta(op.get_table_id(), + *op.get_stmt(), + *index_schema, + *cg_.opt_ctx_->get_session_info(), + *scan_loc_meta))) { + LOG_WARN("generate table loc meta failed", K(ret)); + } else if (OB_FAIL(tsc_ctdef.attach_spec_.attach_loc_metas_.push_back(scan_loc_meta))) { + LOG_WARN("store scan loc meta failed", K(ret)); + } else { + aux_lookup_ctdef->children_[0] = vec_scan_ctdef; + aux_lookup_ctdef->children_[1] = scan_ctdef; + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(aux_lookup_ctdef->result_output_.assign(result_outputs))) { + LOG_WARN("assign result output failed", K(ret)); + } + } + } + return ret; +} + int ObTscCgService::generate_doc_id_lookup_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *ir_scan_ctdef, @@ -1634,6 +2082,171 @@ int ObTscCgService::generate_doc_id_lookup_ctdef(const ObLogTableScan &op, return ret; } +int ObTscCgService::extract_rowkey_vid_access_columns( + const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &access_exprs) +{ + int ret = OB_SUCCESS; + bool vid_is_found = false; + const ObIArray &exprs = op.get_rowkey_vid_exprs(); + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); ++i) { + ObRawExpr *expr = exprs.at(i); + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, expr is nullptr", K(ret), K(i), K(exprs)); + } else if (ObRawExpr::EXPR_COLUMN_REF != expr->get_expr_class()) { + // just skip, nothing to do. + } else if (!vid_is_found && static_cast(expr)->is_vec_vid_column()) { + vid_is_found = true; + if (OB_FAIL(access_exprs.push_back(expr))) { + LOG_WARN("fail to add doc id access expr", K(ret), KPC(expr)); + } + } else if (static_cast(expr)->is_rowkey_column()) { + if (OB_FAIL(access_exprs.push_back(expr))) { + LOG_WARN("fail to add doc id access expr", K(ret), KPC(expr)); + } + } + } + if (OB_FAIL(ret)) { + } else if (OB_UNLIKELY(!vid_is_found)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, vid raw expr isn't found", K(ret), K(exprs), K(scan_ctdef)); + } + return ret; +} + +int ObTscCgService::extract_rowkey_vid_output_columns_ids( + const share::schema::ObTableSchema &schema, + const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &output_cids) +{ + int ret = OB_SUCCESS; + bool vid_is_found = false; + const ObIArray &exprs = op.get_rowkey_vid_exprs(); + ObArray access_exprs; + for (int64_t i = 0; OB_SUCC(ret) && !vid_is_found && i < exprs.count(); ++i) { + ObRawExpr *expr = exprs.at(i); + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, expr is nullptr", K(ret), K(i), K(exprs)); + } else if (ObRawExpr::EXPR_COLUMN_REF != expr->get_expr_class()) { + // just skip, nothing to do. + } else if (static_cast(expr)->is_vec_vid_column()) { + vid_is_found = true; + if (OB_FAIL(access_exprs.push_back(expr))) { + LOG_WARN("fail to add doc id access expr", K(ret), KPC(expr)); + } + } else if (static_cast(expr)->is_rowkey_column()) { + if (OB_FAIL(access_exprs.push_back(expr))) { + LOG_WARN("fail to add doc id access expr", K(ret), KPC(expr)); + } + } + } + if (OB_FAIL(ret)) { + } else if (OB_UNLIKELY(!vid_is_found)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, vid id raw expr isn't found", K(ret), K(exprs), K(scan_ctdef)); + } else if (OB_FAIL(extract_das_column_ids(access_exprs, output_cids))) { + LOG_WARN("extract column ids failed", K(ret)); + } + return ret; +} + +int ObTscCgService::generate_rowkey_vid_ctdef( + const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASScanCtDef *&rowkey_vid_scan_ctdef) +{ + int ret = OB_SUCCESS; + const ObTableSchema *data_schema = nullptr; + const ObTableSchema *rowkey_vid_schema = nullptr; + ObDASScanCtDef *scan_ctdef = nullptr; + ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); + uint64_t rowkey_vid_tid = OB_INVALID_ID; + + if (OB_ISNULL(schema_guard)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, schema guard is nullptr", K(ret), KP(cg_.opt_ctx_)); + } else if (OB_FAIL(schema_guard->get_table_schema(op.get_ref_table_id(), data_schema))) { + LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id())); + } else if (OB_ISNULL(data_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get data table schema", K(ret)); + } else if (OB_FAIL(data_schema->get_rowkey_vid_tid(rowkey_vid_tid))) { + LOG_WARN("failed to get rowkey doc tid", K(ret), KPC(data_schema)); + } else if (OB_FAIL(schema_guard->get_table_schema(op.get_ref_table_id(), + rowkey_vid_tid, + op.get_stmt(), + rowkey_vid_schema))) { + LOG_WARN("get table schema failed", K(ret), K(rowkey_vid_tid)); + } else if (OB_ISNULL(rowkey_vid_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get rowkey doc schema", K(ret), K(rowkey_vid_tid)); + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, cg_.phy_plan_->get_allocator(), scan_ctdef))) { + LOG_WARN("alloc das ctdef failed", K(ret)); + } else { + bool has_rowscn = false; + scan_ctdef->ref_table_id_ = rowkey_vid_tid; + ObDASTableLocMeta *scan_loc_meta = + OB_NEWx(ObDASTableLocMeta, &cg_.phy_plan_->get_allocator(), cg_.phy_plan_->get_allocator()); + if (OB_ISNULL(scan_loc_meta)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate scan location meta failed", K(ret)); + } else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) { + LOG_WARN("generate das lookup scan ctdef failed", K(ret)); + } else if (OB_FAIL(generate_table_loc_meta(op.get_table_id(), + *op.get_stmt(), + *rowkey_vid_schema, + *cg_.opt_ctx_->get_session_info(), + *scan_loc_meta))) { + LOG_WARN("generate table loc meta failed", K(ret)); + } else if (OB_FAIL(tsc_ctdef.attach_spec_.attach_loc_metas_.push_back(scan_loc_meta))) { + LOG_WARN("store scan loc meta failed", K(ret)); + } else { + rowkey_vid_scan_ctdef = scan_ctdef; + } + } + return ret; +} + +int ObTscCgService::generate_das_scan_ctdef_with_vid( + const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASScanCtDef *scan_ctdef, + ObDASVIdMergeCtDef *&vid_merge_ctdef) +{ + int ret = OB_SUCCESS; + ObArray result_outputs; + ObDASScanCtDef *rowkey_vid_scan_ctdef = nullptr; + if (OB_ISNULL(scan_ctdef)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(scan_ctdef)); + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_VID_MERGE, cg_.phy_plan_->get_allocator(), + vid_merge_ctdef))) { + LOG_WARN("fail to allocate to doc id merge ctdef", K(ret)); + } else if (OB_ISNULL(vid_merge_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &cg_.phy_plan_->get_allocator(), 2))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate doc id merge ctdef child array memory", K(ret)); + } else if (OB_FAIL(generate_rowkey_vid_ctdef(op, tsc_ctdef, rowkey_vid_scan_ctdef))) { + LOG_WARN("fail to generate rowkey doc ctdef", K(ret)); + } else if (OB_FAIL(result_outputs.assign(scan_ctdef->result_output_))) { + LOG_WARN("construct aux lookup ctdef failed", K(ret)); + } else if (OB_UNLIKELY(result_outputs.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, result outputs is nullptr", K(ret)); + } else { + vid_merge_ctdef->children_cnt_ = 2; + vid_merge_ctdef->children_[0] = scan_ctdef; + vid_merge_ctdef->children_[1] = rowkey_vid_scan_ctdef; + if (OB_FAIL(vid_merge_ctdef->result_output_.assign(result_outputs))) { + LOG_WARN("fail to assign result output", K(ret)); + } + } + return ret; +} + int ObTscCgService::generate_table_lookup_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *scan_ctdef, @@ -1765,6 +2378,31 @@ int ObTscCgService::extract_doc_id_index_back_access_columns( return ret; } +int ObTscCgService::extract_vec_id_index_back_access_columns( + const ObLogTableScan &op, + ObIArray &access_exprs) +{ + int ret = OB_SUCCESS; + ObArray domain_col_exprs; + if (OB_UNLIKELY(0 == op.get_domain_exprs().count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected empty domain expr array", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(op.get_domain_exprs(), domain_col_exprs, true))) { + LOG_WARN("failed to extract domain column ref exprs", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < domain_col_exprs.count(); ++i) { + ObRawExpr *raw_expr = domain_col_exprs.at(i); + ObColumnRefRawExpr *col_expr = static_cast(raw_expr); + if (col_expr->is_vec_vid_column() + || (col_expr->get_table_id() == op.get_table_id() && col_expr->is_rowkey_column())) { + if (OB_FAIL(add_var_to_array_no_dup(access_exprs, raw_expr))) { + LOG_WARN("failed to push vid index back access column to access exprs", K(ret)); + } + } + } + return ret; +} + int ObTscCgService::extract_doc_id_index_back_output_column_ids( const ObLogTableScan &op, ObIArray &output_cids) diff --git a/src/sql/code_generator/ob_tsc_cg_service.h b/src/sql/code_generator/ob_tsc_cg_service.h index ab2b465e33..07eb9a749e 100644 --- a/src/sql/code_generator/ob_tsc_cg_service.h +++ b/src/sql/code_generator/ob_tsc_cg_service.h @@ -24,6 +24,7 @@ struct ObTableScanCtDef; struct ObDASScanCtDef; struct AgentVtAccessMeta; //help to cg the tsc ctdef +struct ObDASVIdMergeCtDef; class ObTscCgService { public: @@ -46,11 +47,15 @@ public: const ObSQLSessionInfo &session, ObDASTableLocMeta &loc_meta); int generate_das_result_output(const common::ObIArray &output_cids, + common::ObIArray &vec_vid_expr, ObDASScanCtDef &scan_ctdef, const ObRawExpr *trans_info_expr, const bool include_agg = false); private: - int generate_access_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef, bool &has_rowscn); + int generate_access_ctdef(const ObLogTableScan &op, + ObDASScanCtDef &scan_ctdef, + common::ObIArray &vec_vid_expr, + bool &has_rowscn); int generate_pushdown_aggr_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef); int generate_das_scan_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef, bool &has_rowscn); int generate_table_param(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef, common::ObIArray &tsc_out_cols); @@ -67,26 +72,55 @@ private: int extract_das_column_ids(const common::ObIArray &column_exprs, common::ObIArray &column_ids); int generate_geo_access_ctdef(const ObLogTableScan &op, const ObTableSchema &index_schema, ObArray &access_exprs); int generate_text_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef); + int generate_vec_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef); int extract_text_ir_access_columns(const ObLogTableScan &op, const ObDASScanCtDef &scan_ctdef, ObIArray &access_exprs); + int extract_vec_ir_access_columns(const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &access_exprs); int extract_text_ir_das_output_column_ids(const ObLogTableScan &op, const ObDASScanCtDef &scan_ctdef, ObIArray &output_cids); + int extract_vector_das_output_column_ids(const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &output_cids); + int extract_rowkey_vid_access_columns(const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &access_exprs); + int extract_rowkey_vid_output_columns_ids(const share::schema::ObTableSchema &schema, + const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &output_cids); int generate_text_ir_pushdown_expr_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef); int generate_text_ir_spec_exprs(const ObLogTableScan &op, ObDASIRScanCtDef &text_ir_scan_ctdef); + int generate_vec_ir_spec_exprs(const ObLogTableScan &op, + ObDASVecAuxScanCtDef &vec_ir_scan_ctdef); int generate_doc_id_lookup_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *ir_scan_ctdef, ObExpr *doc_id_expr, ObDASIRAuxLookupCtDef *&aux_lookup_ctdef); + int generate_vec_id_lookup_ctdef(const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASBaseCtDef *vec_scan_ctdef, + ObDASIRAuxLookupCtDef *&aux_lookup_ctdef); + int generate_rowkey_vid_ctdef(const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASScanCtDef *&rowkey_doc_scan_ctdef); + int generate_das_scan_ctdef_with_vid(const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASScanCtDef *scan_ctdef, + ObDASVIdMergeCtDef *&vid_merge_ctdef); int generate_table_lookup_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *scan_ctdef, ObDASTableLookupCtDef *&lookup_ctdef); int extract_doc_id_index_back_access_columns(const ObLogTableScan &op, ObIArray &access_exprs); + int extract_vec_id_index_back_access_columns(const ObLogTableScan &op, + ObIArray &access_exprs); int extract_doc_id_index_back_output_column_ids(const ObLogTableScan &op, ObIArray &output_cids); int filter_out_match_exprs(ObIArray &exprs); diff --git a/src/sql/das/iter/ob_das_iter.cpp b/src/sql/das/iter/ob_das_iter.cpp index 4f7e7fee18..ed1ebc9a64 100644 --- a/src/sql/das/iter/ob_das_iter.cpp +++ b/src/sql/das/iter/ob_das_iter.cpp @@ -13,6 +13,7 @@ #define USING_LOG_PREFIX SQL_DAS #include "sql/das/iter/ob_das_iter.h" +#include "sql/das/iter/ob_das_vid_merge_iter.h" namespace oceanbase @@ -129,5 +130,28 @@ int ObDASIter::get_next_rows(int64_t &count, int64_t capacity) return ret; } +int ObDASIter::get_vid_merge_iter(ObDASVIdMergeIter *&vid_merge_iter) +{ + int ret = OB_SUCCESS; + vid_merge_iter = nullptr; + if (OB_UNLIKELY(!inited_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("das iter get next rows before init", K(ret)); + } else if (ObDASIterType::DAS_ITER_VEC_VID_MERGE == type_) { + vid_merge_iter = static_cast(this); + } else { + for (int64_t i = 0; OB_SUCC(ret) && nullptr == vid_merge_iter && i < children_cnt_; ++i) { + ObDASIter *iter = children_[i]; + if (OB_ISNULL(iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("das iter is nullptr", K(ret), KPC(iter)); + } else if (OB_FAIL(iter->get_vid_merge_iter(vid_merge_iter))) { + LOG_WARN("fail to get vid merge iter", K(ret), KPC(iter)); + } + } + } + return ret; +} + } // namespace sql } // namespace oceanbase diff --git a/src/sql/das/iter/ob_das_iter.h b/src/sql/das/iter/ob_das_iter.h index ffb88c15df..9c5a771305 100644 --- a/src/sql/das/iter/ob_das_iter.h +++ b/src/sql/das/iter/ob_das_iter.h @@ -23,6 +23,7 @@ using namespace common; namespace sql { +class ObDASVIdMergeIter; class ObEvalCtx; class ObExecContext; struct ObDASIterParam @@ -116,6 +117,7 @@ public: virtual void reset() override {} // for compatibility with ObNewRowIterator + int get_vid_merge_iter(ObDASVIdMergeIter *&vid_merge_iter); protected: virtual int inner_init(ObDASIterParam ¶m) = 0; virtual int inner_reuse() = 0; diff --git a/src/sql/das/iter/ob_das_iter_define.h b/src/sql/das/iter/ob_das_iter_define.h index 4e9b851a05..a1e48161d7 100644 --- a/src/sql/das/iter/ob_das_iter_define.h +++ b/src/sql/das/iter/ob_das_iter_define.h @@ -31,6 +31,7 @@ enum ObDASIterType : uint32_t DAS_ITER_TEXT_RETRIEVAL, DAS_ITER_SORT, DAS_ITER_TEXT_RETRIEVAL_MERGE, + DAS_ITER_VEC_VID_MERGE, // append DASIterType before me DAS_ITER_MAX }; @@ -67,6 +68,7 @@ struct ObDASRelatedTabletID public: common::ObTabletID lookup_tablet_id_; common::ObTabletID aux_lookup_tablet_id_; + common::ObTabletID rowkey_vid_tablet_id_; /* used by fulltext index */ common::ObTabletID inv_idx_tablet_id_; @@ -77,6 +79,7 @@ public: { lookup_tablet_id_.reset(); aux_lookup_tablet_id_.reset(); + rowkey_vid_tablet_id_.reset(); inv_idx_tablet_id_.reset(); fwd_idx_tablet_id_.reset(); doc_id_idx_tablet_id_.reset(); diff --git a/src/sql/das/iter/ob_das_iter_utils.cpp b/src/sql/das/iter/ob_das_iter_utils.cpp index 979d033e6a..f3086184a7 100644 --- a/src/sql/das/iter/ob_das_iter_utils.cpp +++ b/src/sql/das/iter/ob_das_iter_utils.cpp @@ -35,7 +35,7 @@ int ObDASIterUtils::create_das_scan_iter_tree(ObDASIterTreeType tree_type, int ret = OB_SUCCESS; switch (tree_type) { case ITER_TREE_PARTITION_SCAN: { - ret = create_partition_scan_tree(scan_param, alloc, scan_ctdef, scan_rtdef, iter_tree); + ret = create_partition_scan_tree(scan_param, alloc, scan_ctdef, scan_rtdef, attach_ctdef, attach_rtdef, related_tablet_ids, trans_desc, snapshot, iter_tree); break; } case ITER_TREE_LOCAL_LOOKUP: { @@ -220,6 +220,11 @@ int ObDASIterUtils::create_partition_scan_tree(storage::ObTableScanParam &scan_p common::ObIAllocator &alloc, const ObDASScanCtDef *scan_ctdef, ObDASScanRtDef *scan_rtdef, + const ObDASBaseCtDef *attach_ctdef, + ObDASBaseRtDef *attach_rtdef, + const ObDASRelatedTabletID &related_tablet_ids, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot, ObDASIter *&iter_tree) { int ret = OB_SUCCESS; @@ -231,6 +236,15 @@ int ObDASIterUtils::create_partition_scan_tree(storage::ObTableScanParam &scan_p } else { scan_iter->set_scan_param(scan_param); iter_tree = scan_iter; + if (OB_NOT_NULL(attach_ctdef)) { + if (OB_UNLIKELY(ObDASOpType::DAS_OP_VID_MERGE != attach_ctdef->op_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, attach op type isn't doc id merge", K(ret), K(attach_ctdef->op_type_), KPC(attach_ctdef)); + } else if (OB_FAIL(create_vid_scan_sub_tree(scan_param, alloc, static_cast(attach_ctdef), + static_cast(attach_rtdef), related_tablet_ids, trans_desc, snapshot, iter_tree))) { + LOG_WARN("fail to create vec vid scan sub tree", K(ret), K(scan_param), KPC(attach_ctdef), KPC(attach_rtdef)); + } + } } return ret; } @@ -515,6 +529,55 @@ int ObDASIterUtils::create_text_retrieval_sub_tree(const ObLSID &ls_id, return ret; } +int ObDASIterUtils::create_vid_scan_sub_tree( + ObTableScanParam &scan_param, + common::ObIAllocator &alloc, + const ObDASVIdMergeCtDef *merge_ctdef, + ObDASVIdMergeRtDef *merge_rtdef, + const ObDASRelatedTabletID &related_tablet_ids, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot, + ObDASIter *&iter_tree) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(merge_ctdef) || OB_UNLIKELY(2 != merge_ctdef->children_cnt_) + || OB_ISNULL(iter_tree) || OB_UNLIKELY(ObDASIterType::DAS_ITER_SCAN != iter_tree->get_type())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(merge_ctdef), KPC(iter_tree)); + } else { + ObDASVIdMergeIterParam vid_merge_param; + ObDASVIdMergeIter *vid_merge_iter = nullptr; + ObDASScanIterParam rowkey_vid_param; + ObDASScanIter *rowkey_vid_iter = nullptr; + rowkey_vid_param.scan_ctdef_ = static_cast(merge_ctdef->children_[1]); + if (OB_FAIL(create_das_iter(alloc, rowkey_vid_param, rowkey_vid_iter))) { + LOG_WARN("fail to create das scan iter", K(ret), K(rowkey_vid_param)); + } else { + vid_merge_param.rowkey_vid_tablet_id_ = related_tablet_ids.rowkey_vid_tablet_id_; + vid_merge_param.rowkey_vid_ls_id_ = scan_param.ls_id_; + vid_merge_param.data_table_ctdef_ = static_cast(merge_ctdef->children_[0]); + vid_merge_param.rowkey_vid_ctdef_ = static_cast(merge_ctdef->children_[1]); + vid_merge_param.data_table_rtdef_ = static_cast(merge_rtdef->children_[0]); + vid_merge_param.rowkey_vid_rtdef_ = static_cast(merge_rtdef->children_[1]); + vid_merge_param.data_table_iter_ = static_cast(iter_tree); + vid_merge_param.rowkey_vid_iter_ = rowkey_vid_iter; + vid_merge_param.trans_desc_ = trans_desc; + vid_merge_param.snapshot_ = snapshot; + if (OB_FAIL(create_das_iter(alloc, vid_merge_param, vid_merge_iter))) { + LOG_WARN("fail to create vid id merge iter", K(ret), K(vid_merge_param)); + } else if (OB_FAIL(create_iter_children_array(2, alloc, vid_merge_iter))) { + LOG_WARN("fail to create vid id merge iter children array", K(ret)); + } else { + vid_merge_iter->get_children()[0] = iter_tree; + vid_merge_iter->get_children()[1] = rowkey_vid_iter; + rowkey_vid_iter->set_scan_param(vid_merge_iter->get_rowkey_vid_scan_param()); + iter_tree = vid_merge_iter; + } + } + } + return ret; +} + int ObDASIterUtils::create_domain_lookup_sub_tree(const ObLSID &ls_id, common::ObIAllocator &alloc, const ObDASTableLookupCtDef *table_lookup_ctdef, diff --git a/src/sql/das/iter/ob_das_iter_utils.h b/src/sql/das/iter/ob_das_iter_utils.h index 84c814e650..8417282b70 100644 --- a/src/sql/das/iter/ob_das_iter_utils.h +++ b/src/sql/das/iter/ob_das_iter_utils.h @@ -22,6 +22,7 @@ #include "sql/das/iter/ob_das_sort_iter.h" #include "sql/das/iter/ob_das_text_retrieval_iter.h" #include "sql/das/iter/ob_das_text_retrieval_merge_iter.h" +#include "sql/das/iter/ob_das_vid_merge_iter.h" #include "sql/engine/table/ob_table_scan_op.h" namespace oceanbase @@ -77,6 +78,11 @@ private: common::ObIAllocator &alloc, const ObDASScanCtDef *scan_ctdef, ObDASScanRtDef *scan_rtdef, + const ObDASBaseCtDef *attach_ctdef, + ObDASBaseRtDef *attach_rtdef, + const ObDASRelatedTabletID &related_tablet_ids, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot, ObDASIter *&iter_tree); static int create_local_lookup_tree(ObTableScanParam &scan_param, @@ -105,6 +111,15 @@ private: transaction::ObTxReadSnapshot *snapshot, ObDASIter *&iter_tree); + static int create_vid_scan_sub_tree(ObTableScanParam &scan_param, + common::ObIAllocator &alloc, + const ObDASVIdMergeCtDef *merge_ctdef, + ObDASVIdMergeRtDef *merge_rtdef, + const ObDASRelatedTabletID &related_tablet_ids, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot, + ObDASIter *&iter_tree); + static int create_domain_lookup_sub_tree(const ObLSID &ls_id, common::ObIAllocator &alloc, const ObDASTableLookupCtDef *table_lookup_ctdef, diff --git a/src/sql/das/iter/ob_das_local_lookup_iter.cpp b/src/sql/das/iter/ob_das_local_lookup_iter.cpp index 4e56b55895..e32dd00123 100644 --- a/src/sql/das/iter/ob_das_local_lookup_iter.cpp +++ b/src/sql/das/iter/ob_das_local_lookup_iter.cpp @@ -15,6 +15,7 @@ #include "sql/das/iter/ob_das_scan_iter.h" #include "sql/das/ob_das_scan_op.h" #include "sql/das/ob_das_ir_define.h" +#include "sql/das/ob_das_vec_define.h" #include "storage/concurrency_control/ob_data_validation_service.h" namespace oceanbase diff --git a/src/sql/das/iter/ob_das_merge_iter.cpp b/src/sql/das/iter/ob_das_merge_iter.cpp index 1cb197823f..90068cfdb7 100644 --- a/src/sql/das/iter/ob_das_merge_iter.cpp +++ b/src/sql/das/iter/ob_das_merge_iter.cpp @@ -13,6 +13,8 @@ #define USING_LOG_PREFIX SQL_DAS #include "sql/das/iter/ob_das_merge_iter.h" #include "sql/das/ob_data_access_service.h" +#include "sql/engine/ob_exec_context.h" +#include "sql/das/ob_das_context.h" namespace oceanbase { diff --git a/src/sql/das/iter/ob_das_vid_merge_iter.cpp b/src/sql/das/iter/ob_das_vid_merge_iter.cpp new file mode 100644 index 0000000000..ff5a59dcd1 --- /dev/null +++ b/src/sql/das/iter/ob_das_vid_merge_iter.cpp @@ -0,0 +1,785 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_DAS + +#include "sql/das/iter/ob_das_vid_merge_iter.h" +#include "sql/das/iter/ob_das_iter_define.h" +#include "sql/das/ob_das_scan_op.h" +#include "sql/das/ob_das_attach_define.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace sql +{ + +ObDASVIdMergeIterParam::ObDASVIdMergeIterParam() + : ObDASIterParam(DAS_ITER_VEC_VID_MERGE), + rowkey_vid_tablet_id_(), + rowkey_vid_ls_id_(), + rowkey_vid_iter_(nullptr), + data_table_iter_(nullptr), + rowkey_vid_ctdef_(nullptr), + data_table_ctdef_(nullptr), + rowkey_vid_rtdef_(nullptr), + data_table_rtdef_(nullptr), + trans_desc_(nullptr), + snapshot_(nullptr) +{} + +ObDASVIdMergeIterParam::~ObDASVIdMergeIterParam() +{ +} + +ObDASVIdMergeIter::ObDASVIdMergeIter() + : ObDASIter(), + need_filter_rowkey_vid_(true), + rowkey_vid_scan_param_(), + rowkey_vid_iter_(nullptr), + data_table_iter_(nullptr), + rowkey_vid_ctdef_(nullptr), + data_table_ctdef_(nullptr), + rowkey_vid_rtdef_(nullptr), + data_table_rtdef_(nullptr), + rowkey_vid_tablet_id_(), + rowkey_vid_ls_id_(), + merge_memctx_() +{ +} + +ObDASVIdMergeIter::~ObDASVIdMergeIter() +{ +} + +int ObDASVIdMergeIter::do_table_scan() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(rowkey_vid_iter_) || OB_ISNULL(data_table_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, rowkey vid or data table iter is nullptr", K(ret), KP(rowkey_vid_iter_), + KP(data_table_iter_)); + } else if (OB_FAIL(build_rowkey_vid_range())) { + LOG_WARN("fail to build rowkey vid range", K(ret)); + } else if (OB_FAIL(data_table_iter_->do_table_scan())) { + LOG_WARN("fail to do table scan for data table", K(ret), KPC(data_table_iter_)); + } else if (OB_FAIL(rowkey_vid_iter_->do_table_scan())) { + LOG_WARN("fail to do table scan for rowkey vid", K(ret), KPC(rowkey_vid_iter_)); + } + LOG_INFO("do table scan", K(ret), K(data_table_iter_->get_scan_param()), K(rowkey_vid_iter_->get_scan_param())); + return ret; +} + +int ObDASVIdMergeIter::rescan() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(rowkey_vid_iter_) || OB_ISNULL(data_table_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, rowkey vid or data table iter is nullptr", K(ret), KP(rowkey_vid_iter_), + KP(data_table_iter_)); + } else if (OB_FAIL(build_rowkey_vid_range())) { + LOG_WARN("fail to build rowkey vid range", K(ret)); + } else if (OB_FAIL(data_table_iter_->rescan())) { + LOG_WARN("fail to rescan data table iter", K(ret), KPC(data_table_iter_)); + } else { + rowkey_vid_scan_param_.tablet_id_ = rowkey_vid_tablet_id_; + rowkey_vid_scan_param_.ls_id_ = rowkey_vid_ls_id_; + if (OB_FAIL(rowkey_vid_iter_->rescan())) { + LOG_WARN("fail to rescan rowkey doc iter", K(ret), KPC(rowkey_vid_iter_)); + } + } + LOG_INFO("rescan", K(ret), K(data_table_iter_->get_scan_param()), K(rowkey_vid_iter_->get_scan_param())); + return ret; +} + +void ObDASVIdMergeIter::clear_evaluated_flag() +{ + if (OB_NOT_NULL(rowkey_vid_iter_)) { + rowkey_vid_iter_->clear_evaluated_flag(); + } + if (OB_NOT_NULL(data_table_iter_)) { + data_table_iter_->clear_evaluated_flag(); + } +} + +int ObDASVIdMergeIter::inner_init(ObDASIterParam ¶m) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(ObDASIterType::DAS_ITER_VEC_VID_MERGE != param.type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("inner init das iter with bad param type", K(ret), K(param)); + } else { + ObDASVIdMergeIterParam &merge_param = static_cast(param); + lib::ContextParam param; + param.set_mem_attr(MTL_ID(), "DocIdMerge", ObCtxIds::DEFAULT_CTX_ID).set_properties(lib::USE_TL_PAGE_OPTIONAL); + if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(merge_memctx_, param))) { + LOG_WARN("failed to create merge memctx", K(ret)); + } else if (OB_FAIL(init_rowkey_vid_scan_param(merge_param.rowkey_vid_tablet_id_, merge_param.rowkey_vid_ls_id_, + merge_param.rowkey_vid_ctdef_, merge_param.rowkey_vid_rtdef_, merge_param.trans_desc_, + merge_param.snapshot_))) { + LOG_WARN("fail to init rowkey vid scan param", K(ret), K(merge_param)); + } else { + data_table_iter_ = merge_param.data_table_iter_; + rowkey_vid_iter_ = merge_param.rowkey_vid_iter_; + data_table_ctdef_ = merge_param.data_table_ctdef_; + rowkey_vid_ctdef_ = merge_param.rowkey_vid_ctdef_; + data_table_rtdef_ = merge_param.data_table_rtdef_; + rowkey_vid_rtdef_ = merge_param.rowkey_vid_rtdef_; + rowkey_vid_tablet_id_ = merge_param.rowkey_vid_tablet_id_; + rowkey_vid_ls_id_ = merge_param.rowkey_vid_ls_id_; + need_filter_rowkey_vid_ = true; + } + } + return ret; +} + +int ObDASVIdMergeIter::set_vid_merge_related_ids( + const ObDASRelatedTabletID &tablet_ids, + const share::ObLSID &ls_id) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!tablet_ids.rowkey_vid_tablet_id_.is_valid() || !ls_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid rowkey doc tablet id or ls id", K(ret), K(tablet_ids.rowkey_vid_tablet_id_), K(ls_id)); + } else { + rowkey_vid_tablet_id_ = tablet_ids.rowkey_vid_tablet_id_; + rowkey_vid_ls_id_ = ls_id; + } + return ret; +} + +int ObDASVIdMergeIter::inner_reuse() +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(data_table_iter_) && OB_FAIL(data_table_iter_->reuse())) { + LOG_WARN("fail to reuse data table iter", K(ret)); + } else if (OB_NOT_NULL(rowkey_vid_iter_)) { + const ObTabletID old_tablet_id = rowkey_vid_scan_param_.tablet_id_; + const bool tablet_id_changed = old_tablet_id.is_valid() && old_tablet_id != rowkey_vid_tablet_id_; + rowkey_vid_scan_param_.need_switch_param_ = rowkey_vid_scan_param_.need_switch_param_ || (tablet_id_changed ? true : false); + if (OB_FAIL(rowkey_vid_iter_->reuse())) { + LOG_WARN("fail to reuse rowkey vid iter", K(ret)); + } + } + if (OB_NOT_NULL(merge_memctx_)) { + merge_memctx_->reset_remain_one_page(); + } + return ret; +} + +int ObDASVIdMergeIter::inner_release() +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(merge_memctx_)) { + DESTROY_CONTEXT(merge_memctx_); + merge_memctx_ = nullptr; + } + rowkey_vid_scan_param_.destroy_schema_guard(); + rowkey_vid_scan_param_.snapshot_.reset(); + rowkey_vid_scan_param_.destroy(); + data_table_iter_ = nullptr; + rowkey_vid_iter_ = nullptr; + need_filter_rowkey_vid_ = true; + return ret; +} + +int ObDASVIdMergeIter::inner_get_next_row() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(data_table_iter_) || OB_ISNULL(rowkey_vid_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, data table or rowkey vid iter is nullptr", K(ret), KP(data_table_iter_), + K(rowkey_vid_iter_)); + } else if (!need_filter_rowkey_vid_) { + if (OB_FAIL(concat_row())) { + LOG_WARN("fail to concat data table and rowkey vid row", K(ret)); + } + } else if (OB_FAIL(sorted_merge_join_row())) { + LOG_WARN("fail to sorted merge join data table and rowkey vid row", K(ret)); + } + LOG_TRACE("inner get next row", K(ret)); + return ret; +} + +int ObDASVIdMergeIter::inner_get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(data_table_iter_) || OB_ISNULL(rowkey_vid_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, data table or rowkey vid iter is nullptr", K(ret), KP(data_table_iter_), + K(rowkey_vid_iter_)); + } else if (!need_filter_rowkey_vid_) { + if (OB_FAIL(concat_rows(count, capacity))) { + LOG_WARN("fail to concat data table and rowkey vid rows", K(ret)); + } + } else if (OB_FAIL(sorted_merge_join_rows(count, capacity))) { + LOG_WARN("fail to sorted merge join data table and rowkey vid rows", K(ret)); + } + LOG_TRACE("inner get next rows", K(ret), K(count), K(capacity)); + return ret; +} + +int ObDASVIdMergeIter::init_rowkey_vid_scan_param( + const common::ObTabletID &tablet_id, + const share::ObLSID &ls_id, + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot) +{ + int ret = OB_SUCCESS; + uint64_t tenant_id = MTL_ID(); + rowkey_vid_scan_param_.tenant_id_ = tenant_id; + rowkey_vid_scan_param_.key_ranges_.set_attr(ObMemAttr(tenant_id, "SParamKR")); + rowkey_vid_scan_param_.ss_key_ranges_.set_attr(ObMemAttr(tenant_id, "SParamSSKR")); + if (OB_UNLIKELY(!tablet_id.is_valid() || !ls_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(tablet_id), K(ls_id)); + } else if (OB_ISNULL(ctdef) || OB_ISNULL(rtdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr ctdef or rtdef", K(ret), KPC(ctdef), KPC(rtdef)); + } else { + rowkey_vid_scan_param_.tablet_id_ = tablet_id; + rowkey_vid_scan_param_.ls_id_ = ls_id; + rowkey_vid_scan_param_.scan_allocator_ = &get_arena_allocator(); + rowkey_vid_scan_param_.allocator_ = &rtdef->stmt_allocator_; + rowkey_vid_scan_param_.tx_lock_timeout_ = rtdef->tx_lock_timeout_; + rowkey_vid_scan_param_.index_id_ = ctdef->ref_table_id_; + rowkey_vid_scan_param_.is_get_ = ctdef->is_get_; + rowkey_vid_scan_param_.is_for_foreign_check_ = rtdef->is_for_foreign_check_; + rowkey_vid_scan_param_.timeout_ = rtdef->timeout_ts_; + rowkey_vid_scan_param_.scan_flag_ = rtdef->scan_flag_; + rowkey_vid_scan_param_.reserved_cell_count_ = ctdef->access_column_ids_.count(); + rowkey_vid_scan_param_.sql_mode_ = rtdef->sql_mode_; + rowkey_vid_scan_param_.frozen_version_ = rtdef->frozen_version_; + rowkey_vid_scan_param_.force_refresh_lc_ = rtdef->force_refresh_lc_; + rowkey_vid_scan_param_.output_exprs_ = &(ctdef->pd_expr_spec_.access_exprs_); + rowkey_vid_scan_param_.aggregate_exprs_ = &(ctdef->pd_expr_spec_.pd_storage_aggregate_output_); + rowkey_vid_scan_param_.ext_file_column_exprs_ = &(ctdef->pd_expr_spec_.ext_file_column_exprs_); + rowkey_vid_scan_param_.ext_column_convert_exprs_ = &(ctdef->pd_expr_spec_.ext_column_convert_exprs_); + rowkey_vid_scan_param_.calc_exprs_ = &(ctdef->pd_expr_spec_.calc_exprs_); + rowkey_vid_scan_param_.table_param_ = &(ctdef->table_param_); + rowkey_vid_scan_param_.op_ = rtdef->p_pd_expr_op_; + rowkey_vid_scan_param_.row2exprs_projector_ = rtdef->p_row2exprs_projector_; + rowkey_vid_scan_param_.schema_version_ = ctdef->schema_version_; + rowkey_vid_scan_param_.tenant_schema_version_ = rtdef->tenant_schema_version_; + rowkey_vid_scan_param_.limit_param_ = rtdef->limit_param_; + rowkey_vid_scan_param_.need_scn_ = rtdef->need_scn_; + rowkey_vid_scan_param_.pd_storage_flag_ = ctdef->pd_expr_spec_.pd_storage_flag_.pd_flag_; + rowkey_vid_scan_param_.fb_snapshot_ = rtdef->fb_snapshot_; + rowkey_vid_scan_param_.fb_read_tx_uncommitted_ = rtdef->fb_read_tx_uncommitted_; + if (rtdef->is_for_foreign_check_) { + rowkey_vid_scan_param_.trans_desc_ = trans_desc; + } + if (OB_NOT_NULL(snapshot)) { + rowkey_vid_scan_param_.snapshot_ = *snapshot; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null snapshot", K(ret), KPC(ctdef), KPC(rtdef)); + } + if (OB_NOT_NULL(trans_desc)) { + rowkey_vid_scan_param_.tx_id_ = trans_desc->get_tx_id(); + } else { + rowkey_vid_scan_param_.tx_id_.reset(); + } + if (!ctdef->pd_expr_spec_.pushdown_filters_.empty()) { + rowkey_vid_scan_param_.op_filters_ = &ctdef->pd_expr_spec_.pushdown_filters_; + } + rowkey_vid_scan_param_.pd_storage_filters_ = rtdef->p_pd_expr_op_->pd_storage_filters_; + if (OB_FAIL(rowkey_vid_scan_param_.column_ids_.assign(ctdef->access_column_ids_))) { + LOG_WARN("failed to assign column ids", K(ret)); + } + if (rtdef->sample_info_ != nullptr) { + rowkey_vid_scan_param_.sample_info_ = *rtdef->sample_info_; + } + } + + LOG_INFO("init rowkey vid table scan param finished", K(rowkey_vid_scan_param_), K(ret)); + return ret; +} + +int ObDASVIdMergeIter::build_rowkey_vid_range() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(data_table_iter_) || OB_ISNULL(data_table_ctdef_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, data table iter or ctdef is nullptr", K(ret), KP(data_table_iter_), KP(data_table_ctdef_)); + } else { + const common::ObIArray &key_ranges = data_table_iter_->get_scan_param().key_ranges_; + const common::ObIArray &ss_key_ranges = data_table_iter_->get_scan_param().ss_key_ranges_; + for (int64_t i = 0; OB_SUCC(ret) && i < key_ranges.count(); ++i) { + ObNewRange key_range = key_ranges.at(i); + key_range.table_id_ = rowkey_vid_scan_param_.index_id_; + if (OB_FAIL(rowkey_vid_scan_param_.key_ranges_.push_back(key_range))) { + LOG_WARN("fail to push back key range for rowkey vid scan param", K(ret), K(key_range)); + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < ss_key_ranges.count(); ++i) { + ObNewRange ss_key_range = ss_key_ranges.at(i); + ss_key_range.table_id_ = rowkey_vid_scan_param_.index_id_; + if (OB_FAIL(rowkey_vid_scan_param_.ss_key_ranges_.push_back(ss_key_range))) { + LOG_WARN("fail to push back ss key range for rowkey vid scan param", K(ret), K(ss_key_range)); + } + } + } + + if (OB_SUCC(ret)) { + const ObExprPtrIArray *op_filters = data_table_iter_->get_scan_param().op_filters_; + if (OB_ISNULL(op_filters) || (OB_NOT_NULL(op_filters) && op_filters->empty())) { + need_filter_rowkey_vid_ = false; + } else { + need_filter_rowkey_vid_ = true; + } + rowkey_vid_scan_param_.sample_info_ = data_table_iter_->get_scan_param().sample_info_; + } + LOG_INFO("build rowkey vid range", K(ret), K(need_filter_rowkey_vid_), K(rowkey_vid_scan_param_.key_ranges_), + K(rowkey_vid_scan_param_.ss_key_ranges_), K(rowkey_vid_scan_param_.sample_info_)); + return ret; +} + +int ObDASVIdMergeIter::concat_row() +{ + int ret = OB_SUCCESS; + int64_t vid_id; + if (OB_FAIL(data_table_iter_->get_next_row())) { + if (OB_ITER_END == ret) { + if (OB_FAIL(rowkey_vid_iter_->get_next_row())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("fail to get next rows", K(ret)); + } + } else { + ObArenaAllocator allocator("RowkeyVid"); + common::ObRowkey rowkey; + if (OB_FAIL(get_rowkey(allocator, rowkey_vid_ctdef_, rowkey_vid_rtdef_, rowkey))) { + LOG_WARN("fail to process_data_table_rowkey", K(ret)); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row count isn't equal between data table and rowkey doc", K(ret), K(rowkey), + K(rowkey_vid_iter_->get_scan_param()), K(data_table_iter_->get_scan_param())); + } + } + } else { + LOG_WARN("fail to get next row", K(ret)); + } + } else if (OB_FAIL(rowkey_vid_iter_->get_next_row())) { + LOG_WARN("fail to get next row", K(ret)); + } else if (OB_FAIL(get_vid_id(rowkey_vid_ctdef_, rowkey_vid_rtdef_, vid_id))) { + LOG_WARN("fail to get vid id", K(ret)); + } else if (OB_FAIL(fill_vid_id_in_data_table(vid_id))) { + LOG_WARN("fail to fill vid id in data table", K(ret), K(vid_id)); + } + return ret; +} + +int ObDASVIdMergeIter::concat_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + int64_t data_row_cnt = 0; + int64_t rowkey_vid_row_cnt = 0; + common::ObArray vid_ids; + bool need_fill_vids = false; + if (OB_FAIL(data_table_iter_->get_next_rows(data_row_cnt, capacity))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next row", K(ret)); + } + } + if (OB_FAIL(ret) && ret != OB_ITER_END) { + } else { // whatever succ or iter_end, we should get from rowkey_vid_iter + bool expect_iter_end = (ret == OB_ITER_END); + int real_cap = (data_row_cnt > 0 && !expect_iter_end) ? data_row_cnt : capacity; + ret = OB_SUCCESS; // recover ret from iter end + while (OB_SUCC(ret) && (real_cap > 0 || expect_iter_end)) { + rowkey_vid_row_cnt = 0; + if (OB_FAIL(rowkey_vid_iter_->get_next_rows(rowkey_vid_row_cnt, real_cap))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next row", K(ret), K(data_row_cnt), K(real_cap), K(vid_ids)); + } + } + if (OB_FAIL(ret) && OB_ITER_END != ret) { + } else if (rowkey_vid_row_cnt > 0) { + const int tmp_ret = ret; + if (OB_FAIL(get_vid_ids(rowkey_vid_row_cnt, rowkey_vid_ctdef_, rowkey_vid_rtdef_, vid_ids))) { + LOG_WARN("fail to get vid ids", K(ret), K(count)); + } else { + ret = tmp_ret; + } + } + real_cap -= rowkey_vid_row_cnt; + } + if (expect_iter_end && ret != OB_ITER_END) { + int tmp_ret = ret; + ret = OB_ERR_UNEXPECTED; + LOG_WARN("row count isn't equal between data table and rowkey vid", + K(ret), K(tmp_ret), K(capacity), K(vid_ids.count()), K(data_row_cnt)); + } + } + if (OB_FAIL(ret) && OB_ITER_END != ret) { + } else if (OB_UNLIKELY(data_row_cnt != vid_ids.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("The row count of data table isn't equal to rowkey vid", K(ret), K(data_row_cnt), K(vid_ids), + K(data_table_iter_->get_scan_param()), K(rowkey_vid_iter_->get_scan_param())); + } else { + count = data_row_cnt; + if (count > 0 && data_table_ctdef_->vec_vid_idx_ != -1) { + const int tmp_ret = ret; + if (OB_FAIL(fill_vid_ids_in_data_table(vid_ids))) { + LOG_WARN("fail to fill vid ids in data table", K(ret), K(tmp_ret), K(vid_ids)); + } else { + ret = tmp_ret; + } + } + } + LOG_TRACE("concat rows in data table and rowkey vid", K(ret), K(data_row_cnt), K(vid_ids), K(count), + K(capacity)); + return ret; +} + +int ObDASVIdMergeIter::sorted_merge_join_row() +{ + int ret = OB_SUCCESS; + ObArenaAllocator allocator("DocIdMergeRow"); + common::ObRowkey data_table_rowkey; + if (OB_FAIL(data_table_iter_->get_next_row()) && OB_ITER_END != ret) { + LOG_WARN("fail to get next data table row", K(ret)); + } else if (OB_ITER_END == ret) { + while (OB_SUCC(rowkey_vid_iter_->get_next_row())); + if (OB_ITER_END != ret) { + LOG_WARN("fail to get next rowkey vid row", K(ret)); + } + } else if (OB_FAIL(get_rowkey(allocator, data_table_ctdef_, data_table_rtdef_, data_table_rowkey))) { + LOG_WARN("fail to get data table rowkey", K(ret)); + } else { + int64_t vid_id; + bool is_found = false; + while (OB_SUCC(ret) && !is_found) { + common::ObRowkey rowkey_vid_rowkey; + if (OB_FAIL(rowkey_vid_iter_->get_next_row())) { + LOG_WARN("fail to get next rowkey vid row", K(ret)); + } else if (OB_FAIL(get_rowkey(allocator, rowkey_vid_ctdef_, rowkey_vid_rtdef_, rowkey_vid_rowkey))) { + LOG_WARN("fail to get rowkey vid rowkey"); + } else if (rowkey_vid_rowkey.equal(data_table_rowkey, is_found)) { + LOG_WARN("fail to equal rowkey between data table and rowkey", K(ret)); + } + LOG_TRACE("compare one row in rowkey vid", K(ret), "need_skip=", !is_found, K(data_table_rowkey), + K(rowkey_vid_rowkey)); + } + if (OB_FAIL(ret)) { + if (OB_ITER_END == ret) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, The row count of data table isn't equal to rowkey vid", K(ret)); + } + } else if (OB_FAIL(get_vid_id(rowkey_vid_ctdef_, rowkey_vid_rtdef_, vid_id))) { + LOG_WARN("fail to get vid id", K(ret)); + } else if (OB_FAIL(fill_vid_id_in_data_table(vid_id))) { + LOG_WARN("fail to fill vid id in data table", K(ret), K(vid_id)); + } + } + return ret; +} + +int ObDASVIdMergeIter::sorted_merge_join_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + ObArenaAllocator allocator("DocIdMergeRows"); + common::ObArray rowkeys_in_data_table; + common::ObArray vid_ids; + bool is_iter_end = false; + int64_t data_table_cnt = 0; + if (OB_FAIL(data_table_iter_->get_next_rows(data_table_cnt, capacity)) && OB_ITER_END != ret) { + LOG_WARN("fail to get next data table rows", K(ret), K(data_table_cnt), K(capacity), KPC(data_table_iter_)); + } else if (0 == data_table_cnt && OB_ITER_END == ret) { + count = 0; + } else if (OB_UNLIKELY(0 == data_table_cnt && OB_SUCCESS == ret)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, data table row count is 0, but ret code is success", K(ret), KPC(data_table_iter_)); + } else if (OB_ITER_END == ret && FALSE_IT(is_iter_end = true)) { + } else if (OB_FAIL(get_rowkeys(data_table_cnt, allocator, data_table_ctdef_, data_table_rtdef_, + rowkeys_in_data_table))) { + LOG_WARN("fail to get data table rowkeys", K(ret), K(data_table_cnt)); + } else { + const int64_t batch_size = is_iter_end ? capacity : data_table_cnt; + int64_t remain_cnt = data_table_cnt; + int64_t rowkey_vid_cnt = 0; + while (OB_SUCC(ret) && remain_cnt > 0) { + common::ObArray rowkeys_in_rowkey_vid; + common::ObArray vid_ids_in_rowkey_vid; + if (OB_FAIL(rowkey_vid_iter_->get_next_rows(rowkey_vid_cnt, batch_size)) && OB_ITER_END != ret) { + LOG_WARN("fail to get next rowkey vid rows", K(ret), K(remain_cnt), K(batch_size), K(rowkey_vid_iter_)); + } else if (OB_UNLIKELY(OB_ITER_END == ret && (!is_iter_end || 0 == rowkey_vid_cnt))){ + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, iter end is reached at rowkey vid, but not at data table", K(ret), K(is_iter_end), + K(rowkey_vid_cnt)); + } else if (OB_FAIL(get_rowkeys_and_vid_ids(rowkey_vid_cnt, allocator, rowkey_vid_ctdef_, rowkey_vid_rtdef_, + rowkeys_in_rowkey_vid, vid_ids_in_rowkey_vid))) { + LOG_WARN("fail to get rowkey vid rowkeys", K(ret), K(rowkey_vid_cnt)); + } else { + for (int64_t i = data_table_cnt - remain_cnt, j = 0; + OB_SUCC(ret) && i < data_table_cnt && j < rowkeys_in_rowkey_vid.count(); + ++j) { + bool is_equal = false; + LOG_INFO("compare one row in rowkey vid", K(ret), K(i), K(j), K(rowkeys_in_data_table.at(i)), + K(rowkeys_in_rowkey_vid.at(j))); + if (rowkeys_in_rowkey_vid.at(j).equal(rowkeys_in_data_table.at(i), is_equal)) { + LOG_WARN("fail to equal rowkey between data table and rowkey", K(ret)); + } else if (is_equal) { + if (OB_FAIL(vid_ids.push_back(vid_ids_in_rowkey_vid.at(j)))) { + LOG_WARN("fail to push back vid id", K(ret), K(j), K(vid_ids_in_rowkey_vid)); + } else { + --remain_cnt; + ++i; + LOG_INFO("find vid id in rowkey vid", K(vid_ids_in_rowkey_vid.at(j)), K(remain_cnt), K(i), K(data_table_cnt)); + } + } + } + } + } + if (FAILEDx(fill_vid_ids_in_data_table(vid_ids))) { + LOG_WARN("fail to fill vid ids in data table", K(ret), K(vid_ids)); + } else { + count = data_table_cnt; + ret = is_iter_end ? OB_ITER_END : ret; + } + } + return ret; +} + +int ObDASVIdMergeIter::get_rowkey( + common::ObIAllocator &allocator, + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + common::ObRowkey &rowkey) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(ctdef) || OB_ISNULL(rtdef)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(ctdef), KP(rtdef)); + } else { + const int64_t rowkey_cnt = ctdef->table_param_.get_read_info().get_schema_rowkey_count(); + void *buf = nullptr; + if (OB_ISNULL(buf = allocator.alloc(sizeof(ObObj) * rowkey_cnt))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate rowkey obj buffer", K(ret), K(rowkey_cnt)); + } else { + ObObj *obj_ptr = new (buf) ObObj[rowkey_cnt]; + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_cnt; ++i) { + ObExpr *expr = ctdef->result_output_.at(i); + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, expr is nullptr", K(ret), K(i), KPC(ctdef)); + } else if (T_PSEUDO_GROUP_ID == expr->type_) { + // nothing to do. + } else { + ObDatum &datum = expr->locate_expr_datum(*rtdef->eval_ctx_); + if (OB_FAIL(datum.to_obj(obj_ptr[i], expr->obj_meta_, expr->obj_datum_map_))) { + LOG_WARN("fail to convert datum to obj", K(ret)); + } + } + } + if (OB_SUCC(ret)) { + rowkey.assign(obj_ptr, rowkey_cnt); + } + } + } + return ret; +} + +int ObDASVIdMergeIter::get_vid_id( + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + int64_t &vid_id) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(ctdef) || OB_ISNULL(rtdef)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(ctdef), KP(rtdef)); + } else { + const int64_t rowkey_cnt = ctdef->table_param_.get_read_info().get_schema_rowkey_count(); + const int64_t extern_size = ctdef->trans_info_expr_ != nullptr ? 1 : 0; + ObExpr *expr = nullptr; + if (GCONF.enable_strict_defensive_check()) { + if (OB_UNLIKELY(ctdef->result_output_.count() != rowkey_cnt + 1 + extern_size)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected result output column count", K(ret), K(rowkey_cnt), K(ctdef->result_output_.count())); + } + } else if (OB_UNLIKELY(ctdef->result_output_.count() != rowkey_cnt + 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected result output column count", K(ret), K(rowkey_cnt), + K(ctdef->result_output_.count())); + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(expr = ctdef->result_output_.at(rowkey_cnt))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, vid id expr is nullptr", K(ret), K(rowkey_cnt), K(ctdef->result_output_)); + } else { + ObDatum &datum = expr->locate_expr_datum(*rtdef->eval_ctx_); + vid_id = datum.get_int(); + // if (OB_FAIL(vid_id.from_string(datum.get_string()))) { + // LOG_WARN("fail to get vid id", K(ret), K(datum)); + // } + } + } + return ret; +} + +int ObDASVIdMergeIter::get_rowkeys( + const int64_t size, + common::ObIAllocator &allocator, + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + common::ObIArray &rowkeys) +{ + int ret = OB_SUCCESS; + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(*rtdef->eval_ctx_); + batch_info_guard.set_batch_size(size); + for (int64_t i = 0; OB_SUCC(ret) && i < size; ++i) { + batch_info_guard.set_batch_idx(i); + common::ObRowkey rowkey; + if (OB_FAIL(get_rowkey(allocator, ctdef, rtdef, rowkey))) { + LOG_WARN("fail to process_data_table_rowkey", K(ret), K(i)); + } else if (OB_FAIL(rowkeys.push_back(rowkey))) { + LOG_WARN("fail to push back rowkey", K(ret), K(rowkey)); + } + } + return ret; +} + +int ObDASVIdMergeIter::get_vid_ids( + const int64_t size, + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + common::ObIArray &vid_ids) +{ + int ret = OB_SUCCESS; + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(*rtdef->eval_ctx_); + batch_info_guard.set_batch_size(size); + for (int64_t i = 0; OB_SUCC(ret) && i < size; ++i) { + batch_info_guard.set_batch_idx(i); + int64_t vid_id; + if (OB_FAIL(get_vid_id(ctdef, rtdef, vid_id))) { + LOG_WARN("fail to get vid id", K(ret), K(i)); + } else if (OB_FAIL(vid_ids.push_back(vid_id))) { + LOG_WARN("fail to push back vid id", K(ret), K(vid_id)); + } else { + LOG_INFO("[vec index debug]get one vid ", K(vid_id)); + } + } + return ret; +} + +int ObDASVIdMergeIter::get_rowkeys_and_vid_ids( + const int64_t size, + common::ObIAllocator &allocator, + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + common::ObIArray &rowkeys, + common::ObIArray &vid_ids) +{ + int ret = OB_SUCCESS; + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(*rtdef->eval_ctx_); + batch_info_guard.set_batch_size(size); + for (int64_t i = 0; OB_SUCC(ret) && i < size; ++i) { + batch_info_guard.set_batch_idx(i); + common::ObRowkey rowkey; + int64_t vid_id; + if (OB_FAIL(get_rowkey(allocator, ctdef, rtdef, rowkey))) { + LOG_WARN("fail to process_data_table_rowkey", K(ret), K(i)); + } else if (OB_FAIL(rowkeys.push_back(rowkey))) { + LOG_WARN("fail to push back rowkey", K(ret), K(rowkey)); + } else if (OB_FAIL(get_vid_id(ctdef, rtdef, vid_id))) { + LOG_WARN("fail to get vid id", K(ret), K(i)); + } else if (OB_FAIL(vid_ids.push_back(vid_id))) { + LOG_WARN("fail to push back vid id", K(ret), K(vid_id)); + } + } + return ret; +} + +int ObDASVIdMergeIter::fill_vid_id_in_data_table(const int64_t &vid_id) +{ + int ret = OB_SUCCESS; + // if (OB_UNLIKELY(!vid_id.is_valid())) { + // ret = OB_INVALID_ARGUMENT; + // LOG_WARN("invalid arguments", K(ret), K(vid_id)); + // } else + if (OB_ISNULL(data_table_ctdef_) || OB_ISNULL(data_table_rtdef_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, data table ctdef is nullptr", K(ret), KP(data_table_ctdef_), KP(data_table_rtdef_)); + } else { + const int64_t vid_id_idx = data_table_ctdef_->vec_vid_idx_; + ObExpr *vid_id_expr = nullptr; + if (OB_UNLIKELY(vid_id_idx >= data_table_ctdef_->result_output_.count() || vid_id_idx < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid vid id idx", K(ret), K(vid_id_idx), K(data_table_ctdef_->result_output_.count())); + } else if (OB_ISNULL(vid_id_expr = data_table_ctdef_->result_output_.at(vid_id_idx))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, vid id expr is nullptr", K(ret), K(vid_id_idx), KPC(data_table_ctdef_)); + } else { + const uint64_t buf_len = sizeof(int64_t); + ObDatum &datum = vid_id_expr->locate_datum_for_write(*data_table_rtdef_->eval_ctx_); + void *buf = static_cast(vid_id_expr->get_str_res_mem(*data_table_rtdef_->eval_ctx_, buf_len)); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory", K(ret), KP(buf)); + } else { + // ObDocId *vid_id_ptr = new (buf) ObDocId(vid_id); + datum.set_int(vid_id); + vid_id_expr->set_evaluated_projected(*data_table_rtdef_->eval_ctx_); + LOG_INFO("Doc id merge fill a vidument id", K(vid_id)); + } + if (OB_SUCC(ret)) { + } + } + } + return ret; + +} + +int ObDASVIdMergeIter::fill_vid_ids_in_data_table(const common::ObIArray &vid_ids) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(0 == vid_ids.count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(vid_ids)); + } else if (OB_ISNULL(data_table_ctdef_) || OB_ISNULL(data_table_rtdef_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, data table ctdef is nullptr", K(ret), KP(data_table_ctdef_), KP(data_table_rtdef_)); + } else { + const uint64_t len_of_vid_id = sizeof(int64_t); + const uint64_t len_of_buf = len_of_vid_id * vid_ids.count(); + const int64_t vid_id_idx = data_table_ctdef_->vec_vid_idx_; + ObExpr *vid_id_expr = nullptr; + ObDatum *datums = nullptr; + char *buf = nullptr; + if (OB_UNLIKELY(vid_id_idx >= data_table_ctdef_->result_output_.count() || vid_id_idx < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid vid id idx", K(ret), K(vid_id_idx), K(data_table_ctdef_->result_output_.count())); + } else if (OB_ISNULL(vid_id_expr = data_table_ctdef_->result_output_.at(vid_id_idx))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, vid id expr is nullptr", K(ret), K(vid_id_idx), KPC(data_table_ctdef_)); + } else if (OB_ISNULL(datums = vid_id_expr->locate_datums_for_update(*data_table_rtdef_->eval_ctx_, vid_ids.count()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, datums is nullptr", K(ret), KPC(vid_id_expr)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < vid_ids.count(); ++i) { + datums[i].set_int(vid_ids.at(i)); + } + if (OB_SUCC(ret)) { + vid_id_expr->set_evaluated_projected(*data_table_rtdef_->eval_ctx_); + } + } + } + return ret; +} + +} // end namespace sql +} // end namespace oceanbase \ No newline at end of file diff --git a/src/sql/das/iter/ob_das_vid_merge_iter.h b/src/sql/das/iter/ob_das_vid_merge_iter.h new file mode 100644 index 0000000000..94a19d9e61 --- /dev/null +++ b/src/sql/das/iter/ob_das_vid_merge_iter.h @@ -0,0 +1,166 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_DAS_VID_MERGE_ITER_H_ +#define OB_DAS_VID_MERGE_ITER_H_ + +#include "sql/das/iter/ob_das_iter.h" +#include "sql/das/iter/ob_das_scan_iter.h" +#include "common/ob_tablet_id.h" +#include "share/ob_ls_id.h" +#include "storage/access/ob_dml_param.h" + +namespace oceanbase +{ +namespace sql +{ + +class ObDASVIdMergeCtDef; +class ObDASVIdMergeRtDef; + +class ObDASVIdMergeIterParam final : public ObDASIterParam +{ +public: + ObDASVIdMergeIterParam(); + ~ObDASVIdMergeIterParam(); + + virtual bool is_valid() const override + { + return rowkey_vid_tablet_id_.is_valid() + && rowkey_vid_ls_id_.is_valid() + && nullptr != rowkey_vid_iter_ + && nullptr != data_table_iter_ + && nullptr != rowkey_vid_ctdef_ + && nullptr != data_table_ctdef_ + && nullptr != rowkey_vid_rtdef_ + && nullptr != data_table_rtdef_ + && nullptr != snapshot_; + } + INHERIT_TO_STRING_KV("ObDASIterParam", ObDASIterParam, + K(rowkey_vid_tablet_id_), + K(rowkey_vid_ls_id_), + KPC(rowkey_vid_iter_), + KPC(data_table_iter_), + KPC(rowkey_vid_ctdef_), + KPC(data_table_ctdef_), + KPC(rowkey_vid_rtdef_), + KPC(data_table_rtdef_), + KPC(trans_desc_), + KPC(snapshot_)); +public: + common::ObTabletID rowkey_vid_tablet_id_; + share::ObLSID rowkey_vid_ls_id_; + ObDASScanIter *rowkey_vid_iter_; + ObDASScanIter *data_table_iter_; + ObDASScanCtDef *rowkey_vid_ctdef_; + ObDASScanCtDef *data_table_ctdef_; + ObDASScanRtDef *rowkey_vid_rtdef_; + ObDASScanRtDef *data_table_rtdef_; + transaction::ObTxDesc *trans_desc_; + transaction::ObTxReadSnapshot *snapshot_; +}; + +/** + * DAS Iter Tree of DAS Scan with Doc Id: + * + * CASE 1: Partition Scan Tree CASE 2: Index LoopUp Tree + * + * DOC_ID_MERGE_ITER DAS_INDEX_LOOKUP_ITER + * / \ / \ + * / \ / \ + * DAS_SCAN_ITER(DataTable) DAS_SCAN_ITER(RowkeyDoc) DAS_SCAN_ITER(IndexTable) DOC_ID_MERGE_ITER + * / \ + * / \ + * DAS_SCAN_ITER(DataTable) DAS_SCAN_ITER(RowkeyDoc) + **/ +class ObDASVIdMergeIter final : public ObDASIter +{ +public: + ObDASVIdMergeIter(); + ~ObDASVIdMergeIter(); + + storage::ObTableScanParam &get_rowkey_vid_scan_param() { return rowkey_vid_scan_param_; } + virtual int do_table_scan() override; + virtual int rescan() override; + virtual void clear_evaluated_flag() override; + + ObDASScanIter *get_data_table_iter() { return data_table_iter_; } + int set_vid_merge_related_ids(const ObDASRelatedTabletID &tablet_ids, const share::ObLSID &ls_id); + INHERIT_TO_STRING_KV("ObDASIter", ObDASIter, + K(rowkey_vid_scan_param_), + KPC(rowkey_vid_iter_), + KPC(data_table_iter_)); +protected: + virtual int inner_init(ObDASIterParam ¶m) override; + virtual int inner_reuse() override; + virtual int inner_release() override; + virtual int inner_get_next_row() override; + virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override; + common::ObArenaAllocator &get_arena_allocator() { return merge_memctx_->get_arena_allocator(); } + int init_rowkey_vid_scan_param( + const common::ObTabletID &tablet_id, + const share::ObLSID &ls_id, + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot); + int build_rowkey_vid_range(); + int concat_row(); + int concat_rows(int64_t &count, int64_t capacity); + int sorted_merge_join_row(); + int sorted_merge_join_rows(int64_t &count, int64_t capacity); + int get_rowkey( + common::ObIAllocator &allocator, + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + common::ObRowkey &rowkey); + int get_vid_id( + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + int64_t &vid_id); + int get_rowkeys( + const int64_t size, + common::ObIAllocator &allocator, + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + common::ObIArray &rowkeys); + int get_vid_ids( + const int64_t size, + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + common::ObIArray &vid_ids); + int get_rowkeys_and_vid_ids( + const int64_t size, + common::ObIAllocator &allocator, + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + common::ObIArray &rowkeys, + common::ObIArray &vid_ids); + int fill_vid_id_in_data_table(const int64_t &vid_id); + int fill_vid_ids_in_data_table(const common::ObIArray &vid_ids); +private: + bool need_filter_rowkey_vid_; + storage::ObTableScanParam rowkey_vid_scan_param_; + ObDASScanIter *rowkey_vid_iter_; + ObDASScanIter *data_table_iter_; + const ObDASScanCtDef *rowkey_vid_ctdef_; + const ObDASScanCtDef *data_table_ctdef_; + ObDASScanRtDef *rowkey_vid_rtdef_; + ObDASScanRtDef *data_table_rtdef_; + ObTabletID rowkey_vid_tablet_id_; + share::ObLSID rowkey_vid_ls_id_; + lib::MemoryContext merge_memctx_; +}; + +} // end namespace sql +} // end namespace oceanbase +#endif // OB_DAS_VID_MERGE_ITER_H_ \ No newline at end of file diff --git a/src/sql/das/ob_das_attach_define.cpp b/src/sql/das/ob_das_attach_define.cpp index 4ab1cb75d1..cc7b78fa76 100644 --- a/src/sql/das/ob_das_attach_define.cpp +++ b/src/sql/das/ob_das_attach_define.cpp @@ -59,6 +59,10 @@ OB_SERIALIZE_MEMBER((ObDASSortCtDef, ObDASAttachCtDef), OB_SERIALIZE_MEMBER((ObDASSortRtDef, ObDASAttachRtDef)); +OB_SERIALIZE_MEMBER((ObDASVIdMergeCtDef, ObDASAttachCtDef)); + +OB_SERIALIZE_MEMBER((ObDASVIdMergeRtDef, ObDASAttachRtDef)); + OB_DEF_SERIALIZE(ObDASAttachSpec) { int ret = OB_SUCCESS; diff --git a/src/sql/das/ob_das_attach_define.h b/src/sql/das/ob_das_attach_define.h index f90e918554..fa85b9524a 100644 --- a/src/sql/das/ob_das_attach_define.h +++ b/src/sql/das/ob_das_attach_define.h @@ -121,6 +121,29 @@ public: virtual ~ObDASSortRtDef() {} }; +struct ObDASVIdMergeCtDef final : ObDASAttachCtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASVIdMergeCtDef(common::ObIAllocator &alloc) + : ObDASAttachCtDef(alloc, DAS_OP_VID_MERGE) + {} + ~ObDASVIdMergeCtDef() = default; + INHERIT_TO_STRING_KV("ObDASVIdMergeCtDef", ObDASAttachCtDef, KP(this)); + +}; + +struct ObDASVIdMergeRtDef final : ObDASAttachRtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASVIdMergeRtDef() + : ObDASAttachRtDef(DAS_OP_VID_MERGE) + {} + ~ObDASVIdMergeRtDef() = default; + INHERIT_TO_STRING_KV("ObDASVIdMergeRtDef", ObDASAttachRtDef, KP(this)); +}; + struct ObDASAttachSpec { OB_UNIS_VERSION(1); diff --git a/src/sql/das/ob_das_def_reg.h b/src/sql/das/ob_das_def_reg.h index 7b8a34524d..279a737f97 100644 --- a/src/sql/das/ob_das_def_reg.h +++ b/src/sql/das/ob_das_def_reg.h @@ -130,6 +130,10 @@ struct ObDASIRScanCtDef; struct ObDASIRScanRtDef; REGISTER_DAS_ATTACH_OP(DAS_OP_IR_SCAN, ObDASIRScanCtDef, ObDASIRScanRtDef); +struct ObDASVecAuxScanCtDef; +struct ObDASVecAuxScanRtDef; +REGISTER_DAS_ATTACH_OP(DAS_OP_VEC_SCAN, ObDASVecAuxScanCtDef, ObDASVecAuxScanRtDef); + struct ObDASIRAuxLookupCtDef; struct ObDASIRAuxLookupRtDef; REGISTER_DAS_ATTACH_OP(DAS_OP_IR_AUX_LOOKUP, ObDASIRAuxLookupCtDef, ObDASIRAuxLookupRtDef); @@ -138,6 +142,11 @@ struct ObDASSortCtDef; struct ObDASSortRtDef; REGISTER_DAS_ATTACH_OP(DAS_OP_SORT, ObDASSortCtDef, ObDASSortRtDef); +struct ObDASVIdMergeCtDef; +struct ObDASVIdMergeRtDef; +REGISTER_DAS_ATTACH_OP(DAS_OP_VID_MERGE, ObDASVIdMergeCtDef, ObDASVIdMergeRtDef); + + #undef REGISTER_DAS_ATTACH_OP } // namespace sql } // namespace oceanbase diff --git a/src/sql/das/ob_das_define.h b/src/sql/das/ob_das_define.h index 3f8705d7b4..6cd2e9794b 100644 --- a/src/sql/das/ob_das_define.h +++ b/src/sql/das/ob_das_define.h @@ -83,6 +83,8 @@ enum ObDASOpType DAS_OP_IR_SCAN, DAS_OP_IR_AUX_LOOKUP, DAS_OP_SORT, + DAS_OP_VEC_SCAN, + DAS_OP_VID_MERGE, //append OpType before me DAS_OP_MAX }; @@ -419,13 +421,17 @@ OB_INLINE ObDuplicateType loc_meta_to_duplicate_type(const ObDASTableLocMeta &lo return dup_type; } -enum ObTSCIRScanType : uint8_t +enum ObTSCIRScanType : uint16_t { OB_NOT_A_SPEC_SCAN = 0, OB_IR_DOC_ID_IDX_AGG, OB_IR_INV_IDX_AGG, OB_IR_INV_IDX_SCAN, OB_IR_FWD_IDX_AGG, + OB_VEC_DELTA_BUF_SCAN, + OB_VEC_IDX_ID_SCAN, + OB_VEC_SNAPSHOT_SCAN, + OB_VEC_COM_AUX_SCAN }; } // namespace sql diff --git a/src/sql/das/ob_das_delete_op.cpp b/src/sql/das/ob_das_delete_op.cpp index b2fc093328..e45ae12954 100644 --- a/src/sql/das/ob_das_delete_op.cpp +++ b/src/sql/das/ob_das_delete_op.cpp @@ -49,7 +49,16 @@ int ObDASIndexDMLAdaptor::write_rows(cons { int ret = OB_SUCCESS; ObAccessService *as = MTL(ObAccessService *); - if (ctdef.table_param_.get_data_table().is_mlog_table() + if (OB_UNLIKELY(ctdef.table_param_.get_data_table().is_vector_delta_buffer() && + !ctdef.is_access_mlog_as_master_table_)) { + // for vector delta buffer, only do insert when DML with main table + if (OB_FAIL(as->insert_rows(ls_id, tablet_id, *tx_desc_, dml_param_, + ctdef.column_ids_, &iter, affected_rows))) { + if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("insert rows to access service failed", K(ret), K(ls_id), K(tablet_id)); + } + } + } else if (ctdef.table_param_.get_data_table().is_mlog_table() && !ctdef.is_access_mlog_as_master_table_) { ObDASMLogDMLIterator mlog_iter(tablet_id, dml_param_, &iter, DAS_OP_TABLE_DELETE); if (OB_FAIL(as->insert_rows(ls_id, diff --git a/src/sql/das/ob_das_dml_ctx_define.cpp b/src/sql/das/ob_das_dml_ctx_define.cpp index bd841970c6..8afde090e1 100644 --- a/src/sql/das/ob_das_dml_ctx_define.cpp +++ b/src/sql/das/ob_das_dml_ctx_define.cpp @@ -148,7 +148,8 @@ int ObDASDMLIterator::get_next_row(blocksstable::ObDatumRow *&datum_row) } if (OB_SUCC(ret)) { - if (das_ctdef_->table_param_.get_data_table().is_domain_index()) { + if (das_ctdef_->table_param_.get_data_table().is_domain_index() + && !das_ctdef_->is_access_vidx_as_master_table_) { if (OB_FAIL(get_next_domain_index_row(datum_row))) { if (OB_ITER_END != ret) { LOG_WARN("get next domain index row", K(ret), K(das_ctdef_->table_param_.get_data_table())); @@ -196,7 +197,7 @@ int ObDASDMLIterator::get_next_rows(blocksstable::ObDatumRow *&rows, int64_t &ro LOG_WARN("Failed to begin write iterator", K(ret)); } } - if (OB_SUCC(ret) && is_domain_index) { + if (OB_SUCC(ret) && is_domain_index && !das_ctdef_->is_access_vidx_as_master_table_) { if (OB_FAIL(get_next_domain_index_rows(rows, row_count))) { LOG_WARN("fail to get next domain index rows", K(ret)); } diff --git a/src/sql/das/ob_das_dml_ctx_define.h b/src/sql/das/ob_das_dml_ctx_define.h index d38d797109..1c55642ea3 100644 --- a/src/sql/das/ob_das_dml_ctx_define.h +++ b/src/sql/das/ob_das_dml_ctx_define.h @@ -85,9 +85,10 @@ public: uint64_t is_insert_up_ : 1; uint64_t is_table_api_ : 1; uint64_t is_access_mlog_as_master_table_ : 1; + uint64_t is_access_vidx_as_master_table_ : 1; uint64_t is_update_partition_key_ : 1; uint64_t is_update_uk_ : 1; - uint64_t reserved_ : 56; + uint64_t reserved_ : 55; }; }; protected: diff --git a/src/sql/das/ob_das_dml_vec_iter.cpp b/src/sql/das/ob_das_dml_vec_iter.cpp new file mode 100644 index 0000000000..859dba9729 --- /dev/null +++ b/src/sql/das/ob_das_dml_vec_iter.cpp @@ -0,0 +1,251 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_DAS + +#include "sql/das/ob_das_dml_vec_iter.h" +#include "sql/das/ob_das_utils.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" +#include "storage/blocksstable/ob_datum_row_utils.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace sql +{ + +int ObVecIndexDMLIterator::generate_domain_rows(const ObChunkDatumStore::StoredRow *store_row) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(store_row)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(store_row)); + } else if (OB_UNLIKELY(!das_ctdef_->table_param_.get_data_table().is_vector_index())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, it isn't fulltext index", K(ret), K(das_ctdef_->table_param_.get_data_table())); + } else if (das_ctdef_->table_param_.get_data_table().is_vector_index_id() || + das_ctdef_->table_param_.get_data_table().is_vector_index_snapshot()) { + ret = OB_ITER_END; // for 4, 5 table, do not need to write when DML + } else { + int64_t vec_id; + ObString vector; + bool is_update = (das_ctdef_->op_type_ == ObDASOpType::DAS_OP_TABLE_UPDATE); + int64_t row_cnt = is_update ? 2 : 1; + int64_t vec_id_idx = OB_INVALID_ID; + int64_t type_idx = OB_INVALID_ID; + int64_t vector_idx = OB_INVALID_ID; + if (OB_FAIL(get_vector_index_column_idxs(vec_id_idx, type_idx, vector_idx))) { + LOG_WARN("fail to get vector index col idx", K(ret)); + } + for (int i = 0; OB_SUCC(ret) && i < row_cnt; i++) { + if (!is_update_ && OB_FAIL(get_vec_data(store_row, vec_id_idx, vector_idx, vec_id, vector))) { + LOG_WARN("fail to get fulltext and doc id", K(ret), K(vec_id_idx), K(vector_idx), KPC(store_row)); + } else if (is_update_ && OB_FAIL(get_vec_data_for_update(store_row, vec_id_idx, vector_idx, vec_id, vector))) { + LOG_WARN("fail to get fulltext and doc id for update", K(ret), K(vec_id_idx), K(vector_idx), KPC(store_row)); + } else if (OB_FAIL(generate_vec_delta_buff_row(allocator_, store_row, vec_id_idx, type_idx, vector_idx, vec_id, vector, rows_))) { + LOG_WARN("fail to generate vec delta buff rows", K(ret), K(vec_id_idx), K(type_idx), K(vector_idx), K(vec_id), KPC(store_row), K(rows_), KPC(main_ctdef_)); + } else if (is_update) { + is_old_row_ = !is_old_row_; + } + } + } + LOG_DEBUG("generate vector index delta buffer rows", K(ret), K(rows_), KPC(store_row)); + return ret; +} + +int ObVecIndexDMLIterator::get_vec_data( + const ObChunkDatumStore::StoredRow *store_row, + const int64_t vec_id_idx, + const int64_t vector_idx, + int64_t &vec_id, + ObString &vector) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(vec_id_idx >= row_projector_->count() || vector_idx >= row_projector_->count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid vector index column idx", K(ret), K(vec_id_idx), K(vector_idx), KPC(row_projector_)); + } else { + vec_id = store_row->cells()[row_projector_->at(vec_id_idx)].get_int(); + // get vec data without lob + // here expect always has lob header + vector = store_row->cells()[row_projector_->at(vector_idx)].get_string(); + if (das_ctdef_->op_type_ == ObDASOpType::DAS_OP_TABLE_DELETE) { + // do nothing, delete do not need to read vector + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&allocator_, + ObLongTextType, + CS_TYPE_BINARY, + true, + vector))) { + LOG_WARN("fail to get real data.", K(ret), K(vector)); + } else { + LOG_DEBUG("succeed to get vector id and vector", K(vec_id), K(store_row->cells()[row_projector_->at(vector_idx)])); + } + } + return ret; +} + +int ObVecIndexDMLIterator::get_vec_data_for_update( + const ObChunkDatumStore::StoredRow *store_row, + const int64_t vec_id_idx, + const int64_t vector_idx, + int64_t &vec_id, + ObString &vector) +{ + int ret = OB_SUCCESS; + const uint64_t rowkey_col_cnt = das_ctdef_->table_param_.get_data_table().get_rowkey_column_num(); + const uint64_t old_proj_cnt = das_ctdef_->old_row_projector_.count(); + const uint64_t new_proj_cnt = das_ctdef_->new_row_projector_.count(); + if (OB_UNLIKELY(vec_id_idx >= old_proj_cnt || vector_idx >= old_proj_cnt || vec_id_idx >= new_proj_cnt || vector_idx >= new_proj_cnt)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid vector index column idx", K(ret), K(vec_id_idx), K(vector_idx), K(old_proj_cnt), K(new_proj_cnt)); + } else { + // get vec id + const int64_t vec_id_old_proj_idx = das_ctdef_->old_row_projector_.at(vec_id_idx); + const int64_t vec_id_new_proj_idx = das_ctdef_->new_row_projector_.at(vec_id_idx); + int64_t old_vec_id = store_row->cells()[vec_id_old_proj_idx].get_int(); + int64_t new_vec_id = store_row->cells()[vec_id_new_proj_idx].get_int(); + vec_id = is_old_row_ ? old_vec_id : new_vec_id; + // get vec data + const int64_t vector_old_proj_idx = das_ctdef_->old_row_projector_.at(vector_idx); + const int64_t vector_new_proj_idx = das_ctdef_->new_row_projector_.at(vector_idx); + vector = is_old_row_ ? store_row->cells()[vector_old_proj_idx].get_string() + : store_row->cells()[vector_new_proj_idx].get_string(); + // get vec data without lob + // here expect always has lob header + if (OB_FAIL(ObTextStringHelper::read_real_string_data(&allocator_, + ObLongTextType, + CS_TYPE_BINARY, + true, + vector))) { + LOG_WARN("fail to get real data.", K(ret), K(vector)); + } else { + LOG_DEBUG("succeed to get vector data", K(vec_id), K(old_vec_id), K(new_vec_id), K(is_old_row_), + K(store_row->cells()[row_projector_->at(vector_idx)])); + } + } + return ret; +} + +int ObVecIndexDMLIterator::generate_vec_delta_buff_row(common::ObIAllocator &allocator, + const ObChunkDatumStore::StoredRow *store_row, + const int64_t vec_id_idx, + const int64_t type_idx, + const int64_t vector_idx, + const int64_t &vec_id, + ObString &vector, + ObDomainIndexRow &rows) +{ + int ret = OB_SUCCESS; + // for delta buffer table + // [part keys][vid(doc id)][type(char)][vector] + // static int64_t VEC_DELTA_BUFF_COL_CNT = 3; + bool is_update = (das_ctdef_->op_type_ == ObDASOpType::DAS_OP_TABLE_UPDATE); + const IntFixedArray* row_projector = is_update ? + (is_old_row_ ? &das_ctdef_->old_row_projector_ : &das_ctdef_->new_row_projector_) : + row_projector_; + blocksstable::ObDatumRow *row = nullptr; + if (OB_UNLIKELY(vec_id_idx >= row_projector->count() || vector_idx >= row_projector->count() || type_idx >= row_projector->count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid vector index column idx", K(ret), K(vec_id_idx), K(vector_idx), KPC(row_projector)); + } else if (OB_FAIL(blocksstable::ObDatumRowUtils::ob_create_row(allocator_, row_projector->count(), row))) { + LOG_WARN("create current row failed", K(ret), K(is_update), K(is_old_row_), KPC(row_projector)); + } else if (OB_FAIL(ObDASUtils::project_storage_row(*das_ctdef_, + *store_row, + *row_projector, + allocator_, + *row))) { + LOG_WARN("project storage row failed", K(ret)); + } else { + blocksstable::ObStorageDatum *obj_arr = row->storage_datums_; + // const int64_t scn_idx = 3; + obj_arr[vec_id_idx].set_int(vec_id); + ObString ins(1, VEC_DELTA_INSERT); // "I" + ObString del(1, VEC_DELTA_DELETE); // "D" + // set type charset + // ObObjMeta col_type = das_ctdef_->column_types_.at(type_idx); + // obj_arr[type_idx].set_collation_level(col_type.get_collation_level()); + // obj_arr[type_idx].set_collation_type(col_type.get_collation_type()); + switch (das_ctdef_->op_type_) { + case ObDASOpType::DAS_OP_TABLE_DELETE: { + obj_arr[type_idx].set_string(del); + break; + } + case ObDASOpType::DAS_OP_TABLE_UPDATE: { + if (is_old_row_) { + obj_arr[type_idx].set_string(del); + } else { + obj_arr[type_idx].set_string(ins); + } + break; + } + case ObDASOpType::DAS_OP_TABLE_INSERT: { + obj_arr[type_idx].set_string(ins); + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid das op type", K(ret), K(das_ctdef_->op_type_)); + } + } + if (!is_old_row_) { + // obj_arr[vector_idx].set_sql_collection(vector.ptr(), vector.length()); + // obj_arr[vector_idx].set_inrow(); // remove has lob header mark + obj_arr[vector_idx].set_string(vector); + } else { + obj_arr[vector_idx].set_null(); // set vector column is null, do not need to write actual data + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(rows.push_back(row))) { + LOG_WARN("fail to push back row", K(ret), KPC(row)); + } else { + LOG_DEBUG("succeed add delta buffer row", K(ret), K(das_ctdef_->op_type_), K(vec_id), K(is_old_row_)); + } + } + return ret; +} + +int ObVecIndexDMLIterator::get_vector_index_column_idxs(int64_t &vec_id_idx, int64_t &type_idx, int64_t &vector_idx) +{ + int ret = OB_SUCCESS; + // expect must be [vid] [type][vector][scn] + const uint64_t vec_id_col_id = das_ctdef_->table_param_.get_data_table().get_vec_id_col_id(); + const uint64_t vec_vector_col_id = das_ctdef_->table_param_.get_data_table().get_vec_vector_col_id(); + const uint64_t vec_type_col_id = vec_vector_col_id - 1; + if (OB_UNLIKELY(OB_INVALID_ID == vec_id_col_id || OB_INVALID_ID == vec_type_col_id || OB_INVALID_ID == vec_vector_col_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid vector index column id", K(ret), K(vec_id_col_id), K(vec_type_col_id), K(vec_vector_col_id)); + } else { + vec_id_idx = OB_INVALID_INDEX; + type_idx = OB_INVALID_INDEX; + vector_idx = OB_INVALID_INDEX; + for (int64_t i = 0; i < das_ctdef_->table_param_.get_col_descs().count(); i++) { + uint64_t col_id = das_ctdef_->table_param_.get_col_descs().at(i).col_id_; + if (col_id == vec_id_col_id) { + vec_id_idx = i; + } else if (col_id == vec_type_col_id) { + type_idx = i; + } else if (col_id == vec_vector_col_id) { + vector_idx = i; + } + } + if (OB_UNLIKELY(vec_id_idx == OB_INVALID_INDEX || type_idx == OB_INVALID_INDEX || vector_idx == OB_INVALID_INDEX)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not get vec index column idxs", K(ret), K(vec_id_col_id), K(vec_type_col_id), K(vec_vector_col_id), + K(vec_id_idx), K(type_idx), K(vector_idx)); + } + } + return ret; +} + +} // end namespace storage +} // end namespace oceanbase diff --git a/src/sql/das/ob_das_dml_vec_iter.h b/src/sql/das/ob_das_dml_vec_iter.h new file mode 100644 index 0000000000..7f48db7b5a --- /dev/null +++ b/src/sql/das/ob_das_dml_vec_iter.h @@ -0,0 +1,71 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_DAS_DML_VEC_ITER_H +#define OCEANBASE_DAS_DML_VEC_ITER_H + +#include "src/sql/das/ob_das_domain_utils.h" + +namespace oceanbase +{ +namespace sql +{ + +class ObVecIndexDMLIterator final : public ObDomainDMLIterator +{ +public: + static constexpr char* VEC_DELTA_INSERT = const_cast("I"); + static constexpr char* VEC_DELTA_DELETE = const_cast("D"); + ObVecIndexDMLIterator( + common::ObIAllocator &allocator, + const IntFixedArray *row_projector, + ObDASWriteBuffer::Iterator &write_iter, + const ObDASDMLBaseCtDef *das_ctdef, + const ObDASDMLBaseCtDef *main_ctdef) + : ObDomainDMLIterator(allocator, row_projector, write_iter, das_ctdef, main_ctdef), + is_old_row_(das_ctdef_->op_type_ == ObDASOpType::DAS_OP_TABLE_UPDATE) + {} + virtual ~ObVecIndexDMLIterator() = default; + INHERIT_TO_STRING_KV("ObDomainDMLIterator", ObDomainDMLIterator, K_(is_old_row)); +protected: + int get_vec_data( + const ObChunkDatumStore::StoredRow *store_row, + const int64_t vec_id_idx, + const int64_t vector_idx, + int64_t &vec_id, + ObString &vector); + int get_vec_data_for_update( + const ObChunkDatumStore::StoredRow *store_row, + const int64_t vec_id_idx, + const int64_t vector_idx, + int64_t &vec_id, + ObString &vector); +private: + virtual int generate_domain_rows(const ObChunkDatumStore::StoredRow *store_row) override; + int generate_vec_delta_buff_row(common::ObIAllocator &allocator, + const ObChunkDatumStore::StoredRow *store_row, + const int64_t vec_id_idx, + const int64_t type_idx, + const int64_t vector_idx, + const int64_t &vec_id, + ObString &vector, + ObDomainIndexRow &rows); + int get_vector_index_column_idxs(int64_t &vec_id_idx, int64_t &type_idx, int64_t &vector_idx); +private: + bool is_old_row_; +}; + + +} // end namespace sql +} // end namespace oceanbase + +#endif // OCEANBASE_DAS_DML_VEC_ITER_H diff --git a/src/sql/das/ob_das_domain_utils.cpp b/src/sql/das/ob_das_domain_utils.cpp index a62781e6da..01065c1404 100644 --- a/src/sql/das/ob_das_domain_utils.cpp +++ b/src/sql/das/ob_das_domain_utils.cpp @@ -17,6 +17,7 @@ #include "lib/json_type/ob_json_bin.h" #include "sql/das/ob_das_domain_utils.h" #include "sql/das/ob_das_utils.h" +#include "sql/das/ob_das_dml_vec_iter.h" #include "sql/engine/expr/ob_expr_lob_utils.h" #include "observer/omt/ob_tenant_srs.h" #include "storage/blocksstable/ob_datum_row_utils.h" @@ -434,6 +435,15 @@ int ObDASDomainUtils::generate_multivalue_index_rows(ObIAllocator &allocator, ObMultivalueDMLIterator *iter = new (buf) ObMultivalueDMLIterator(allocator, row_projector, write_iter, das_ctdef, main_ctdef); domain_iter = static_cast(iter); } + } else if (das_ctdef->table_param_.get_data_table().is_vector_index()) { + void *buf = nullptr; + if (OB_ISNULL(buf = allocator.alloc(sizeof(ObVecIndexDMLIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate fulltext dml iterator memory", K(ret), KP(buf)); + } else { + ObVecIndexDMLIterator *iter = new (buf) ObVecIndexDMLIterator(allocator, row_projector, write_iter, das_ctdef, main_ctdef); + domain_iter = static_cast(iter); + } } else { ret = OB_NOT_SUPPORTED; LOG_WARN("not supported domain index type", K(ret), K(das_ctdef->table_param_.get_data_table())); diff --git a/src/sql/das/ob_das_factory.cpp b/src/sql/das/ob_das_factory.cpp index 61beb9700b..6c887835b9 100644 --- a/src/sql/das/ob_das_factory.cpp +++ b/src/sql/das/ob_das_factory.cpp @@ -24,6 +24,7 @@ #include "sql/das/ob_das_ref.h" #include "sql/das/ob_das_attach_define.h" #include "sql/das/ob_das_ir_define.h" +#include "sql/das/ob_das_vec_define.h" #include "share/datum/ob_datum_util.h" #define STORE_DAS_OBJ(obj_store, das_obj, class_name) \ diff --git a/src/sql/das/ob_das_scan_op.cpp b/src/sql/das/ob_das_scan_op.cpp index b65b182383..4ca4fdeb67 100644 --- a/src/sql/das/ob_das_scan_op.cpp +++ b/src/sql/das/ob_das_scan_op.cpp @@ -14,7 +14,7 @@ #include "sql/das/ob_das_scan_op.h" #include "sql/das/ob_das_extra_data.h" #include "sql/das/ob_das_spatial_index_lookup_op.h" -#include "sql/das/ob_domain_index_lookup_op.h" +#include "sql/das/ob_vector_index_lookup_op.h" #include "sql/das/ob_das_utils.h" #include "sql/engine/table/ob_table_scan_op.h" #include "sql/engine/px/ob_px_util.h" @@ -333,11 +333,12 @@ ObDASIterTreeType ObDASScanOp::get_iter_tree_type() const bool is_fts_index = scan_param_.table_param_->is_fts_index() && attach_ctdef_ != nullptr; bool is_spatial_index = scan_param_.table_param_->is_spatial_index(); bool is_multivalue_index = scan_param_.table_param_->is_multivalue_index(); + bool is_vector_index = scan_param_.table_param_->is_vec_index(); if (is_fts_index) { tree_type = ObDASIterTreeType::ITER_TREE_TEXT_RETRIEVAL; } else if (is_spatial_index) { tree_type = ObDASIterTreeType::ITER_TREE_GIS_LOOKUP; - } else if (is_multivalue_index) { + } else if (is_multivalue_index || is_vector_index) { tree_type = ObDASIterTreeType::ITER_TREE_DOMAIN_LOOKUP; } else { tree_type = OB_ISNULL(get_lookup_ctdef()) ? ObDASIterTreeType::ITER_TREE_PARTITION_SCAN @@ -353,6 +354,8 @@ int ObDASScanOp::init_related_tablet_ids(ObDASRelatedTabletID &related_tablet_id if (OB_FAIL(get_table_lookup_tablet_id(related_tablet_ids.lookup_tablet_id_))) { LOG_WARN("failed to get table lookup tablet id", K(ret)); } else if (OB_ISNULL(attach_ctdef_) || OB_ISNULL(attach_rtdef_)) { // no attached task. + } else if (OB_FAIL(get_rowkey_vid_tablet_id(related_tablet_ids.rowkey_vid_tablet_id_))) { + LOG_WARN("fail to get rowkey vid tablet id", K(ret)); } else if (OB_FAIL(get_aux_lookup_tablet_id(related_tablet_ids.aux_lookup_tablet_id_))) { LOG_WARN("failed to get aux lookup tablet id", K(ret)); } else if (OB_FAIL(get_text_ir_tablet_ids(related_tablet_ids.inv_idx_tablet_id_, @@ -363,6 +366,82 @@ int ObDASScanOp::init_related_tablet_ids(ObDASRelatedTabletID &related_tablet_id return ret; } +int ObDASScanOp::do_vec_index_rescan() +{ + int ret = OB_SUCCESS; + if (nullptr == result_ || (result_->get_type() != ObNewRowIterator::IterType::ObLocalIndexLookupIterator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected text retrieve rescan status", K(ret), KP_(result)); + } else { + const ObDASTableLookupCtDef *table_lookup_ctdef = nullptr; + ObDASTableLookupRtDef *table_lookup_rtdef = nullptr; + ObTabletID vec_row_tid; + ObTabletID delta_buf_tid; + ObTabletID index_id_tid; + ObTabletID snapshot_tid; + ObTabletID aux_lookup_tablet_id; + ObTabletID lookup_tablet_id; + ObTabletID com_aux_vec_tablet_id; // main + const ObDASVecAuxScanCtDef *vir_scan_ctdef = nullptr; + ObDASVecAuxScanRtDef *vir_scan_rtdef = nullptr; + const ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr; + ObDASIRAuxLookupRtDef *aux_lookup_rtdef = nullptr; + const ObDASSortCtDef *sort_ctdef = nullptr; + ObDASSortRtDef *sort_rtdef = nullptr; + const bool has_lookup = nullptr != get_lookup_ctdef(); + ObVectorIndexLookupOp *vec_op = has_lookup + ? static_cast(result_) + : nullptr; + if (OB_FAIL(get_vec_ir_tablet_ids(vec_row_tid, delta_buf_tid, + index_id_tid, snapshot_tid, + com_aux_vec_tablet_id))) { + LOG_WARN("failed to get aux tablet id", K(ret), K_(related_tablet_ids)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(attach_ctdef_, + attach_rtdef_, + DAS_OP_VEC_SCAN, + vir_scan_ctdef, + vir_scan_rtdef))) { + LOG_WARN("find ir scan definition failed", K(ret)); + } else if (!has_lookup) { + // skip + } else if (OB_ISNULL(vec_op)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to text lookup op", K(ret), KPC(result_)); + } else if (OB_FAIL(get_aux_lookup_tablet_id(aux_lookup_tablet_id))) { + LOG_WARN("failed to get vec id idx tablet id", K(ret), K_(related_tablet_ids)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(attach_ctdef_, + attach_rtdef_, + DAS_OP_IR_AUX_LOOKUP, + aux_lookup_ctdef, + aux_lookup_rtdef))) { + LOG_WARN("find aux lookup definition failed", K(ret)); + } else if (OB_FAIL(get_table_lookup_tablet_id(lookup_tablet_id))) { + LOG_WARN("failed to get lookup tablet id", K(ret), K_(related_tablet_ids)); + } else { + vec_op->set_tablet_id(lookup_tablet_id); + vec_op->set_doc_id_idx_tablet_id(aux_lookup_tablet_id); + vec_op->set_ls_id(ls_id_); + vec_op->set_aux_table_id(delta_buf_tid, index_id_tid, snapshot_tid, lookup_tablet_id); + } + + if (OB_SUCC(ret) && OB_NOT_NULL(vec_op)) { + if (DAS_OP_SORT == aux_lookup_ctdef->get_doc_id_scan_ctdef()->op_type_) { + vec_op->set_sort_ctdef(static_cast(aux_lookup_ctdef->get_doc_id_scan_ctdef())); + vec_op->set_sort_rtdef(static_cast(aux_lookup_rtdef->get_doc_id_scan_rtdef())); + vec_op->set_dim(vir_scan_ctdef->dim_); + if (OB_FAIL(vec_op->init_limit(vir_scan_ctdef, vir_scan_rtdef))) { + LOG_WARN("failed to init limit", K(ret), KPC(vir_scan_ctdef), KPC(vir_scan_rtdef)); + } else if (OB_FAIL(vec_op->init_sort(vir_scan_ctdef, vir_scan_rtdef))) { + LOG_WARN("failed to init sort", K(ret), KPC(vir_scan_ctdef), KPC(vir_scan_rtdef)); + } else if (OB_FAIL(vec_op->set_vec_index_param(vir_scan_ctdef->vec_index_param_))) { + LOG_WARN("failed to set vec index param", K(ret)); + } + } + } + } + return ret; +} + int ObDASScanOp::open_op() { int ret = OB_SUCCESS; @@ -536,7 +615,7 @@ int ObDASScanOp::do_local_index_lookup() { int ret = OB_SUCCESS; ObTabletID lookup_tablet_id; - if (scan_param_.table_param_->is_multivalue_index()) { + if (scan_param_.table_param_->is_multivalue_index() || scan_param_.table_param_->is_vec_index()) { if (OB_FAIL(do_domain_index_lookup())) { LOG_WARN("failed to do domain index lookup", K(ret)); } @@ -607,6 +686,36 @@ int ObDASScanOp::do_domain_index_lookup() op->set_ls_id(ls_id_); } + } else if (scan_param_.table_param_->is_vec_index()) { + ObVectorIndexLookupOp *op = nullptr; + ObTabletID doc_id_idx_tablet_id; + const ObDASTableLookupCtDef *table_lookup_ctdef = nullptr; + ObDASTableLookupRtDef *table_lookup_rtdef = nullptr; + ObTabletID vec_row_tid; + ObTabletID delta_buf_tid; + ObTabletID index_id_tid; + ObTabletID snapshot_tid; + ObTabletID com_aux_vec_tid; + if (OB_FAIL(get_aux_lookup_tablet_id(doc_id_idx_tablet_id))) { + LOG_WARN("failed to get doc id idx tablet id", K(ret), K_(related_tablet_ids)); + } else if (OB_FAIL(get_vec_ir_tablet_ids(vec_row_tid, delta_buf_tid, + index_id_tid, snapshot_tid, + com_aux_vec_tid))) { + LOG_WARN("failed to get aux tablet id", K(ret), K_(related_tablet_ids)); + } else if (OB_ISNULL(op = OB_NEWx(ObVectorIndexLookupOp, &op_alloc_, op_alloc_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate full text index lookup op", K(ret)); + } else if (FALSE_IT(op->set_aux_lookup_iter(result_))) { + } else if (FALSE_IT(result_ = op)) { + } else if (FALSE_IT(op->set_aux_table_id(delta_buf_tid, index_id_tid, snapshot_tid, com_aux_vec_tid))) { + } else if (OB_FAIL(op->init(attach_ctdef_, attach_rtdef_, trans_desc_, snapshot_, scan_param_))) { + LOG_WARN("failed to init full text index lookup op", K(ret)); + } else if (FALSE_IT(get_table_lookup_tablet_id(lookup_tablet_id))) { + } else { + op->set_tablet_id(lookup_tablet_id); + op->set_doc_id_idx_tablet_id(doc_id_idx_tablet_id); + op->set_ls_id(ls_id_); + } } else { ret = OB_NOT_SUPPORTED; LOG_WARN("unsupported domain index type", K(ret)); @@ -845,7 +954,11 @@ int ObDASScanOp::rescan() } else { ObLocalIndexLookupOp *lookup_op = get_lookup_op(); ObTabletID lookup_tablet_id; - if (OB_FAIL(tsc_service.table_rescan(scan_param_, get_storage_scan_iter()))) { + if (scan_param_.table_param_->is_vec_index() && attach_ctdef_ != nullptr) { + if (OB_FAIL(do_vec_index_rescan())) { + LOG_WARN("failed to do text retrieval rescan", K(ret)); + } + } else if (OB_FAIL(tsc_service.table_rescan(scan_param_, get_storage_scan_iter()))) { LOG_WARN("rescan the table iterator failed", K(ret)); } else if (lookup_op != nullptr) { OZ(get_table_lookup_tablet_id(lookup_tablet_id)); @@ -878,6 +991,7 @@ int ObDASScanOp::reuse_iter() LOG_WARN("unexpected nullptr das iter tree", K(ret)); } else { ObDASIter *result = static_cast(result_); + ObDASVIdMergeIter *vid_merge_iter = nullptr; if (OB_FAIL(init_related_tablet_ids(tablet_ids_))) { LOG_WARN("fail to init related tablet ids", K(ret)); } else { @@ -908,10 +1022,19 @@ int ObDASScanOp::reuse_iter() } } - if (OB_SUCC(ret) && OB_FAIL(result->reuse())) { + if (FAILEDx(result->get_vid_merge_iter(vid_merge_iter))) { + LOG_WARN("fail to get vid merge iter", K(ret)); + } else if (OB_NOT_NULL(vid_merge_iter) && OB_FAIL(vid_merge_iter->set_vid_merge_related_ids(tablet_ids_, ls_id_))) { + LOG_WARN("fail to set vid merge related ids", K(ret)); + } else if (OB_FAIL(result->reuse())) { LOG_WARN("failed to reuse das iter tree", K(ret)); } } + } else if (scan_param_.table_param_->is_vec_index() && attach_ctdef_ != nullptr) { + if (nullptr != lookup_op + && OB_FAIL(static_cast(lookup_op)->reuse_scan_iter(scan_param_.need_switch_param_))) { + LOG_WARN("failed to reuse text lookup iters", K(ret)); + } } else if (scan_param_.table_param_->is_multivalue_index() && attach_ctdef_ != nullptr) { if (nullptr != lookup_op && OB_FAIL(static_cast(lookup_op)->reuse_scan_iter(scan_param_.need_switch_param_))) { @@ -1002,6 +1125,47 @@ int ObDASScanOp::set_related_task_info(const ObDASBaseCtDef *lookup_ctdef, return ret; } +int ObDASScanOp::get_rowkey_vid_tablet_id(common::ObTabletID &tablet_id) const +{ + int ret = OB_SUCCESS; + const ObDASScanCtDef *rowkey_vid_ctdef = nullptr; + tablet_id.reset(); + if (nullptr != attach_ctdef_) { + if (ObDASOpType::DAS_OP_VID_MERGE == attach_ctdef_->op_type_) { + const ObDASVIdMergeCtDef *ctdef = static_cast(attach_ctdef_); + if (OB_UNLIKELY(2 != ctdef->children_cnt_) || OB_ISNULL(ctdef->children_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("unexpected error, children of vid id merge ctdef is invalid", K(ret), KPC(ctdef)); + } else { + rowkey_vid_ctdef = static_cast(ctdef->children_[1]); + } + } else if (DAS_OP_TABLE_LOOKUP == attach_ctdef_->op_type_) { + if (OB_UNLIKELY(2 != attach_ctdef_->children_cnt_) || OB_ISNULL(attach_ctdef_->children_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("unexpected error, children of vid id merge ctdef is invalid", K(ret), KPC(attach_ctdef_)); + } else if (DAS_OP_VID_MERGE == attach_ctdef_->children_[1]->op_type_) { + ObDASVIdMergeCtDef *vid_merge_ctdef = static_cast(attach_ctdef_->children_[1]); + OB_ASSERT(2 == vid_merge_ctdef->children_cnt_ && vid_merge_ctdef->children_ != nullptr); + if (OB_UNLIKELY(2 != vid_merge_ctdef->children_cnt_) || OB_ISNULL(vid_merge_ctdef->children_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("unexpected error, children of vid id merge ctdef is invalid", K(ret), KPC(vid_merge_ctdef)); + } else { + rowkey_vid_ctdef = static_cast(vid_merge_ctdef->children_[1]); + } + } + } + } + if (OB_SUCC(ret) && OB_NOT_NULL(rowkey_vid_ctdef)) { + for (int64_t i = 0; !tablet_id.is_valid() && i < related_ctdefs_.count(); ++i) { + if (rowkey_vid_ctdef == related_ctdefs_.at(i)) { + tablet_id = related_tablet_ids_.at(i); + } + } + } + LOG_DEBUG("get rowkey vid tablet id", K(ret), K(tablet_id), KP(rowkey_vid_ctdef), K(related_ctdefs_), KPC(attach_ctdef_)); + return ret; +} + int ObDASScanOp::get_aux_lookup_tablet_id(common::ObTabletID &tablet_id) const { int ret = OB_SUCCESS; @@ -1037,6 +1201,58 @@ int ObDASScanOp::get_table_lookup_tablet_id(common::ObTabletID &tablet_id) const return ret; } +int ObDASScanOp::get_vec_ir_tablet_ids( + common::ObTabletID &vec_row_tid, + common::ObTabletID &delta_buf_tid, + common::ObTabletID &index_id_tid, + common::ObTabletID &snapshot_tid, + common::ObTabletID &com_aux_vec_tid) +{ + int ret = OB_SUCCESS; + vec_row_tid.reset(); + delta_buf_tid.reset(); + index_id_tid.reset(); + snapshot_tid.reset(); + com_aux_vec_tid.reset(); + if (OB_UNLIKELY(related_ctdefs_.count() != related_tablet_ids_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected related scan array not match", K(ret), K_(related_ctdefs), K_(related_tablet_ids)); + } + for (int64_t i= 0; OB_SUCC(ret) && i < related_ctdefs_.count(); ++i) { + const ObDASScanCtDef *ctdef = static_cast(related_ctdefs_.at(i)); + switch (ctdef->ir_scan_type_) { + case ObTSCIRScanType::OB_NOT_A_SPEC_SCAN: { + break; + } + case ObTSCIRScanType::OB_IR_INV_IDX_SCAN: { + vec_row_tid = related_tablet_ids_.at(i); + break; + } + case ObTSCIRScanType::OB_VEC_DELTA_BUF_SCAN: { + delta_buf_tid = related_tablet_ids_.at(i); + break; + } + case ObTSCIRScanType::OB_VEC_IDX_ID_SCAN: { + index_id_tid = related_tablet_ids_.at(i); + break; + } + case ObTSCIRScanType::OB_VEC_SNAPSHOT_SCAN: { + snapshot_tid = related_tablet_ids_.at(i); + break; + } + case ObTSCIRScanType::OB_VEC_COM_AUX_SCAN: { + com_aux_vec_tid = related_tablet_ids_.at(i); + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted ir scan type", K(ret), KPC(ctdef)); + } + } + } + return ret; +} + int ObDASScanOp::get_text_ir_tablet_ids( common::ObTabletID &inv_idx_tablet_id, common::ObTabletID &fwd_idx_tablet_id, diff --git a/src/sql/das/ob_das_scan_op.h b/src/sql/das/ob_das_scan_op.h index 130d94820c..acb2b68151 100644 --- a/src/sql/das/ob_das_scan_op.h +++ b/src/sql/das/ob_das_scan_op.h @@ -90,7 +90,8 @@ public: KPC_(trans_info_expr), K_(ir_scan_type), K_(rowkey_exprs), - K_(table_scan_opt)); + K_(table_scan_opt), + K_(vec_vid_idx)); common::ObTableID ref_table_id_; UIntFixedArray access_column_ids_; int64_t schema_version_; @@ -233,7 +234,9 @@ public: ObDASScanRtDef *get_lookup_rtdef(); int get_aux_lookup_tablet_id(common::ObTabletID &tablet_id) const; int get_table_lookup_tablet_id(common::ObTabletID &tablet_id) const; + int get_rowkey_vid_tablet_id(common::ObTabletID &tablet_id) const; int init_scan_param(); + int do_vec_index_rescan(); int rescan(); int reuse_iter(); void reset_access_datums_ptr(int64_t capacity = 0); @@ -245,6 +248,12 @@ public: common::ObTabletID &inv_idx_tablet_id, common::ObTabletID &fwd_idx_tablet_id, common::ObTabletID &doc_id_idx_tablet_id); + int get_vec_ir_tablet_ids( + common::ObTabletID &vec_row_tid, + common::ObTabletID &delta_buf_tid, + common::ObTabletID &index_id_tid, + common::ObTabletID &snapshot_tid, + common::ObTabletID &com_aux_vec_tid); bool enable_rich_format() const { return scan_rtdef_->enable_rich_format(); } INHERIT_TO_STRING_KV("parent", ObIDASTaskOp, KPC_(scan_ctdef), diff --git a/src/sql/das/ob_das_update_op.cpp b/src/sql/das/ob_das_update_op.cpp index fdc89e44b4..810c26e146 100644 --- a/src/sql/das/ob_das_update_op.cpp +++ b/src/sql/das/ob_das_update_op.cpp @@ -222,7 +222,16 @@ int ObDASIndexDMLAdaptor::write_rows(cons { int ret = OB_SUCCESS; ObAccessService *as = MTL(ObAccessService *); - if (OB_UNLIKELY(ctdef.table_param_.get_data_table().is_domain_index())) { + if (OB_UNLIKELY(ctdef.table_param_.get_data_table().is_vector_delta_buffer() && + !ctdef.is_access_mlog_as_master_table_)) { + // for vector delta buffer, only do insert when DML with main table + if (OB_FAIL(as->insert_rows(ls_id, tablet_id, *tx_desc_, dml_param_, + ctdef.column_ids_, &iter, affected_rows))) { + if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("insert rows to access service failed", K(ret), K(ls_id), K(tablet_id)); + } + } + } else if (OB_UNLIKELY(ctdef.table_param_.get_data_table().is_domain_index())) { if (OB_FAIL(as->delete_rows(ls_id, tablet_id, *tx_desc_, dml_param_, ctdef.column_ids_, &iter, affected_rows))) { if (OB_TRY_LOCK_ROW_CONFLICT != ret) { diff --git a/src/sql/das/ob_das_vec_define.cpp b/src/sql/das/ob_das_vec_define.cpp new file mode 100644 index 0000000000..2b55fba3ec --- /dev/null +++ b/src/sql/das/ob_das_vec_define.cpp @@ -0,0 +1,26 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_DAS +#include "ob_das_vec_define.h" + +namespace oceanbase +{ +namespace sql +{ + +OB_SERIALIZE_MEMBER((ObDASVecAuxScanCtDef, ObDASAttachCtDef), + inv_scan_vec_id_col_, vec_index_param_, dim_); +OB_SERIALIZE_MEMBER(ObDASVecAuxScanRtDef); + +} // sql +} // oceanbase diff --git a/src/sql/das/ob_das_vec_define.h b/src/sql/das/ob_das_vec_define.h new file mode 100644 index 0000000000..2c0f4f0fbf --- /dev/null +++ b/src/sql/das/ob_das_vec_define.h @@ -0,0 +1,180 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_DAS_VEC_DEFINE_H_ +#define OB_DAS_VEC_DEFINE_H_ + +#include "ob_das_attach_define.h" + +namespace oceanbase +{ +namespace sql +{ + +enum ObVecAuxTableIdx { + INV_SCAN_IDX = 0, + DELTA_BUFFER_TBL_IDX = 1, + INDEX_ID_TBL_IDX = 2, + SNAPSHOT_TBL_IDX = 3, + COM_AUX_TBL_IDX = 4 +}; + +struct ObDASVecAuxScanCtDef : ObDASAttachCtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASVecAuxScanCtDef(common::ObIAllocator &alloc) + : ObDASAttachCtDef(alloc, DAS_OP_VEC_SCAN), + inv_scan_vec_id_col_(nullptr), + vec_index_param_(), + dim_(0) + { + } + const ObDASScanCtDef *get_inv_idx_scan_ctdef() const + { + const ObDASScanCtDef *idx_scan_ctdef = nullptr; + if (children_cnt_ > 0 && children_ != nullptr) { + idx_scan_ctdef = static_cast(children_[get_inv_scan_idx()]); + } + return idx_scan_ctdef; + } + + const ObDASScanCtDef *get_delta_tbl_ctdef() const + { + const ObDASScanCtDef *delta_tbl_ctdef = nullptr; + const int64_t ctdef_idx = get_delta_tbl_idx(); + if (children_cnt_ > ctdef_idx && ctdef_idx > 0 && children_ != nullptr) { + const ObDASScanCtDef *child = static_cast(children_[ctdef_idx]); + if (child->ir_scan_type_ == ObTSCIRScanType::OB_VEC_DELTA_BUF_SCAN) { + delta_tbl_ctdef = child; + } + } + return delta_tbl_ctdef; + } + const ObDASScanCtDef *get_index_id_tbl_ctdef() const + { + const ObDASScanCtDef *index_id_tbl_ctdef = nullptr; + const int64_t ctdef_idx = get_index_id_tbl_idx(); + if (children_cnt_ > ctdef_idx && ctdef_idx > 0 && children_ != nullptr) { + const ObDASScanCtDef *child = static_cast(children_[ctdef_idx]); + if (child->ir_scan_type_ == ObTSCIRScanType::OB_VEC_IDX_ID_SCAN) { + index_id_tbl_ctdef = child; + } + } + return index_id_tbl_ctdef; + } + const ObDASScanCtDef *get_snapshot_tbl_ctdef() const + { + const ObDASScanCtDef *snapshot_tbl_ctdef = nullptr; + const int64_t ctdef_idx = get_snapshot_tbl_idx(); + if (children_cnt_ > ctdef_idx && ctdef_idx > 0 && children_ != nullptr) { + const ObDASScanCtDef *child = static_cast(children_[ctdef_idx]); + if (child->ir_scan_type_ == ObTSCIRScanType::OB_VEC_SNAPSHOT_SCAN) { + snapshot_tbl_ctdef = child; + } + } + return snapshot_tbl_ctdef; + } + + const ObDASScanCtDef *get_com_aux_tbl_ctdef() const + { + const ObDASScanCtDef *com_aux_tbl_ctdef = nullptr; + const int64_t ctdef_idx = get_com_aux_tbl_idx(); + if (children_cnt_ > ctdef_idx && ctdef_idx > 0 && children_ != nullptr) { + const ObDASScanCtDef *child = static_cast(children_[ctdef_idx]); + if (child->ir_scan_type_ == ObTSCIRScanType::OB_VEC_COM_AUX_SCAN) { + com_aux_tbl_ctdef = child; + } + } + return com_aux_tbl_ctdef; + } + + int64_t get_inv_scan_idx() const { return ObVecAuxTableIdx::INV_SCAN_IDX; } + int64_t get_delta_tbl_idx() const { return ObVecAuxTableIdx::DELTA_BUFFER_TBL_IDX; } + int64_t get_index_id_tbl_idx() const { return ObVecAuxTableIdx::INDEX_ID_TBL_IDX; } + int64_t get_snapshot_tbl_idx() const { return ObVecAuxTableIdx::SNAPSHOT_TBL_IDX; } + int64_t get_com_aux_tbl_idx() const { return ObVecAuxTableIdx::COM_AUX_TBL_IDX; } + + INHERIT_TO_STRING_KV("ObDASBaseCtDef", ObDASBaseCtDef, + KPC_(inv_scan_vec_id_col), K_(vec_index_param), K_(dim)); + + ObExpr *inv_scan_vec_id_col_; + ObString vec_index_param_; + int64_t dim_; +}; + +struct ObDASVecAuxScanRtDef : ObDASAttachRtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASVecAuxScanRtDef() + : ObDASAttachRtDef(DAS_OP_VEC_SCAN) {} + + virtual ~ObDASVecAuxScanRtDef() {} + + ObDASScanRtDef *get_inv_idx_scan_rtdef() + { + const ObDASVecAuxScanCtDef *ctdef = static_cast(ctdef_); + const int64_t rtdef_idx = ctdef->get_inv_scan_idx(); + ObDASScanRtDef *idx_scan_rtdef = nullptr; + if (children_cnt_ > rtdef_idx && children_ != nullptr) { + idx_scan_rtdef = static_cast(children_[rtdef_idx]); + } + return idx_scan_rtdef; + } + ObDASScanRtDef *get_delta_tbl_rtdef() const + { + const ObDASVecAuxScanCtDef *ctdef = static_cast(ctdef_); + const int64_t rtdef_idx = ctdef->get_delta_tbl_idx(); + ObDASScanRtDef *rtdef = nullptr; + if (children_cnt_ > rtdef_idx && children_ != nullptr) { + rtdef = static_cast(children_[rtdef_idx]); + } + return rtdef; + } + ObDASScanRtDef *get_index_id_tbl_rtdef() const + { + const ObDASVecAuxScanCtDef *ctdef = static_cast(ctdef_); + const int64_t rtdef_idx = ctdef->get_index_id_tbl_idx(); + ObDASScanRtDef *rtdef = nullptr; + if (children_cnt_ > rtdef_idx && children_ != nullptr) { + rtdef = static_cast(children_[rtdef_idx]); + } + return rtdef; + } + ObDASScanRtDef *get_snapshot_tbl_rtdef() const + { + const ObDASVecAuxScanCtDef *ctdef = static_cast(ctdef_); + const int64_t rtdef_idx = ctdef->get_snapshot_tbl_idx(); + ObDASScanRtDef *rtdef = nullptr; + if (children_cnt_ > rtdef_idx && children_ != nullptr) { + rtdef = static_cast(children_[rtdef_idx]); + } + return rtdef; + } + + ObDASScanRtDef *get_com_aux_tbl_rtdef() const + { + const ObDASVecAuxScanCtDef *ctdef = static_cast(ctdef_); + const int64_t rtdef_idx = ctdef->get_com_aux_tbl_idx(); + ObDASScanRtDef *rtdef = nullptr; + if (children_cnt_ > rtdef_idx && children_ != nullptr) { + rtdef = static_cast(children_[rtdef_idx]); + } + return rtdef; + } +}; + +} // namespace sql +} // namespace oceanbase + +#endif diff --git a/src/sql/das/ob_domain_index_lookup_op.cpp b/src/sql/das/ob_domain_index_lookup_op.cpp index 5ec5036fc6..b2f712bd9e 100644 --- a/src/sql/das/ob_domain_index_lookup_op.cpp +++ b/src/sql/das/ob_domain_index_lookup_op.cpp @@ -21,9 +21,9 @@ namespace oceanbase using namespace common; using namespace storage; using namespace transaction; + namespace sql { - int ObDomainIndexLookupOp::init( const ObDASScanCtDef *lookup_ctdef, ObDASScanRtDef *lookup_rtdef, @@ -371,6 +371,7 @@ int ObDomainIndexLookupOp::revert_iter() if (OB_NOT_NULL(doc_id_lookup_rtdef_)) { doc_id_scan_param_.need_switch_param_ = false; doc_id_scan_param_.destroy_schema_guard(); + doc_id_scan_param_.~ObTableScanParam(); } if (OB_FAIL(ObLocalIndexLookupOp::revert_iter())) { diff --git a/src/sql/das/ob_domain_index_lookup_op.h b/src/sql/das/ob_domain_index_lookup_op.h index 3858aebe42..9f07fafb24 100644 --- a/src/sql/das/ob_domain_index_lookup_op.h +++ b/src/sql/das/ob_domain_index_lookup_op.h @@ -19,7 +19,6 @@ namespace oceanbase { namespace sql { - class ObDomainRowkeyComp { public: ObDomainRowkeyComp(int &sort_ret) : result_code_(sort_ret) {} @@ -106,7 +105,7 @@ protected: storage::ObTableScanParam doc_id_scan_param_; const ObDASScanCtDef *doc_id_lookup_ctdef_; ObDASScanRtDef *doc_id_lookup_rtdef_; - ObTabletID doc_id_idx_tablet_id_; + ObTabletID doc_id_idx_tablet_id_; // for vector index, it is vec_id_index_tablet_id ObExpr *doc_id_expr_; ObObj doc_id_key_obj_; @@ -178,7 +177,6 @@ private: ObRowkey aux_last_rowkey_; bool is_inited_; }; - } // namespace sql } // namespace oceanbase #endif /* OBDEV_SRC_SQL_DAS_OB_DOMAIN_INDEX_LOOKUP_OP_H_ */ diff --git a/src/sql/das/ob_text_retrieval_op.cpp b/src/sql/das/ob_text_retrieval_op.cpp index 40ca0ad66b..0b8f761d7f 100644 --- a/src/sql/das/ob_text_retrieval_op.cpp +++ b/src/sql/das/ob_text_retrieval_op.cpp @@ -31,7 +31,6 @@ OB_SERIALIZE_MEMBER((ObDASIRScanCtDef, ObDASAttachCtDef), estimated_total_doc_cnt_); OB_SERIALIZE_MEMBER(ObDASIRScanRtDef); - OB_SERIALIZE_MEMBER((ObDASIRAuxLookupCtDef, ObDASAttachCtDef), relevance_proj_col_); diff --git a/src/sql/das/ob_vector_index_lookup_op.cpp b/src/sql/das/ob_vector_index_lookup_op.cpp new file mode 100644 index 0000000000..e590ff2dd4 --- /dev/null +++ b/src/sql/das/ob_vector_index_lookup_op.cpp @@ -0,0 +1,1264 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#define USING_LOG_PREFIX SQL_DAS +#include "sql/das/ob_das_scan_op.h" +#include "sql/das/ob_das_ir_define.h" +#include "sql/das/ob_das_vec_define.h" +#include "sql/das/ob_vector_index_lookup_op.h" +#include "sql/das/ob_das_utils.h" +#include "sql/engine/ob_exec_context.h" +#include "storage/access/ob_dml_param.h" +#include "src/sql/engine/expr/ob_expr_lob_utils.h" +#include "src/share/vector_index/ob_vector_index_util.h" +#include "src/storage/access/ob_table_scan_iterator.h" +#include "src/share/schema/ob_tenant_schema_service.h" + +namespace oceanbase +{ +using namespace common; +using namespace storage; +using namespace transaction; +using namespace share; + +namespace sql +{ + +int ObVectorIndexLookupOp::init(const ObDASBaseCtDef *table_lookup_ctdef, + ObDASBaseRtDef *table_lookup_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + storage::ObTableScanParam &scan_param) +{ + INIT_SUCC(ret); + const ObDASTableLookupCtDef *tbl_lookup_ctdef = nullptr; + ObDASTableLookupRtDef *tbl_lookup_rtdef = nullptr; + const ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr; + ObDASIRAuxLookupRtDef *aux_lookup_rtdef = nullptr; + const ObDASVecAuxScanCtDef *vir_scan_ctdef = nullptr; + ObDASVecAuxScanRtDef *vir_scan_rtdef = nullptr; + if (OB_ISNULL(table_lookup_ctdef) || OB_ISNULL(table_lookup_rtdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table lookup param is nullptr", KP(table_lookup_ctdef), KP(table_lookup_rtdef)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(table_lookup_ctdef, + table_lookup_rtdef, + DAS_OP_TABLE_LOOKUP, + tbl_lookup_ctdef, + tbl_lookup_rtdef))) { + LOG_WARN("find data table lookup def failed", K(ret)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(tbl_lookup_ctdef, + tbl_lookup_rtdef, + DAS_OP_IR_AUX_LOOKUP, + aux_lookup_ctdef, + aux_lookup_rtdef))) { + LOG_WARN("find ir aux lookup def failed", K(ret)); + } else if (OB_FAIL(ObDASUtils::find_target_das_def(aux_lookup_ctdef, + aux_lookup_rtdef, + DAS_OP_VEC_SCAN, + vir_scan_ctdef, + vir_scan_rtdef))) { + LOG_WARN("find ir scan def failed", K(ret)); + } else { + if (OB_FAIL(ObDomainIndexLookupOp::init(tbl_lookup_ctdef->get_lookup_scan_ctdef(), + tbl_lookup_rtdef->get_lookup_scan_rtdef(), + vir_scan_ctdef->get_inv_idx_scan_ctdef(), + vir_scan_rtdef->get_inv_idx_scan_rtdef(), + aux_lookup_ctdef->get_lookup_scan_ctdef(), + aux_lookup_rtdef->get_lookup_scan_rtdef(), + tx_desc, + snapshot, + scan_param))) { + LOG_WARN("failed to init domain index lookup op", K(ret)); + } else { + need_scan_aux_ = true; + doc_id_lookup_ctdef_ = aux_lookup_ctdef->get_lookup_scan_ctdef(); + doc_id_lookup_rtdef_ = aux_lookup_rtdef->get_lookup_scan_rtdef(); + doc_id_expr_ = vir_scan_ctdef->inv_scan_vec_id_col_; + vec_eval_ctx_ = vir_scan_rtdef->eval_ctx_; + delta_buf_ctdef_ = vir_scan_ctdef->get_delta_tbl_ctdef(); + delta_buf_rtdef_ = vir_scan_rtdef->get_delta_tbl_rtdef(); + index_id_ctdef_ = vir_scan_ctdef->get_index_id_tbl_ctdef(); + index_id_rtdef_ = vir_scan_rtdef->get_index_id_tbl_rtdef(); + snapshot_ctdef_ = vir_scan_ctdef->get_snapshot_tbl_ctdef(); + snapshot_rtdef_ = vir_scan_rtdef->get_snapshot_tbl_rtdef(); + com_aux_vec_ctdef_ = vir_scan_ctdef->get_com_aux_tbl_ctdef(); + com_aux_vec_rtdef_ = vir_scan_rtdef->get_com_aux_tbl_rtdef(); + set_dim(vir_scan_ctdef->dim_); + if (DAS_OP_SORT == aux_lookup_ctdef->get_doc_id_scan_ctdef()->op_type_) { + sort_ctdef_ = static_cast(aux_lookup_ctdef->get_doc_id_scan_ctdef()); + sort_rtdef_ = static_cast(aux_lookup_rtdef->get_doc_id_scan_rtdef()); + if (OB_FAIL(init_limit(vir_scan_ctdef, vir_scan_rtdef))) { + LOG_WARN("failed to init limit", K(ret), KPC(vir_scan_ctdef), KPC(vir_scan_rtdef)); + } else if (nullptr != sort_ctdef_ && OB_FAIL(init_sort(vir_scan_ctdef, vir_scan_rtdef))) { + LOG_WARN("failed to init sort", K(ret), KPC(vir_scan_ctdef), KPC(vir_scan_rtdef)); + } else if (OB_FAIL(set_vec_index_param(vir_scan_ctdef->vec_index_param_))) { + LOG_WARN("failed to set vec index param", K(ret)); + } + } + + if (OB_SUCC(ret)) { + is_inited_ = true; + } + } + } + return ret; +} + +int ObVectorIndexLookupOp::init_limit(const ObDASVecAuxScanCtDef *ir_ctdef, + ObDASVecAuxScanRtDef *ir_rtdef) +{ + int ret = OB_SUCCESS; + if (nullptr != sort_ctdef_ && nullptr != sort_rtdef_) { + // try init top-k limits + bool is_null = false; + if (OB_UNLIKELY((nullptr != sort_ctdef_->limit_expr_ || nullptr != sort_ctdef_->offset_expr_) + && ir_rtdef->get_inv_idx_scan_rtdef()->limit_param_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected top k limit with table scan limit pushdown", K(ret), KPC(ir_ctdef), KPC(ir_rtdef)); + } else if (nullptr != sort_ctdef_->limit_expr_) { + ObDatum *limit_datum = nullptr; + if (OB_FAIL(sort_ctdef_->limit_expr_->eval(*sort_rtdef_->eval_ctx_, limit_datum))) { + LOG_WARN("failed to eval limit expr", K(ret)); + } else if (limit_datum->is_null()) { + is_null = true; + limit_param_.limit_ = 0; + } else { + limit_param_.limit_ = limit_datum->get_int() < 0 ? 0 : limit_datum->get_int(); + } + } + + if (OB_SUCC(ret) && !is_null && nullptr != sort_ctdef_->offset_expr_) { + ObDatum *offset_datum = nullptr; + if (OB_FAIL(sort_ctdef_->offset_expr_->eval(*sort_rtdef_->eval_ctx_, offset_datum))) { + LOG_WARN("failed to eval offset expr", K(ret)); + } else if (offset_datum->is_null()) { + limit_param_.offset_ = 0; + } else { + limit_param_.offset_ = offset_datum->get_int() < 0 ? 0 : offset_datum->get_int(); + } + } + } else { + // init with table scan pushdown limit + limit_param_ = ir_rtdef->get_inv_idx_scan_rtdef()->limit_param_; + } + + if (OB_SUCC(ret)) { + if (limit_param_.offset_ + limit_param_.limit_ > MAX_VSAG_QUERY_RES_SIZE) { + ret = OB_NOT_SUPPORTED; + LOG_USER_WARN(OB_NOT_SUPPORTED, "query size (limit + offset) is more than 16384"); + } + } + return ret; +} + +int ObVectorIndexLookupOp::init_sort(const ObDASVecAuxScanCtDef *ir_ctdef, + ObDASVecAuxScanRtDef *ir_rtdef) +{ + int ret = OB_SUCCESS; + const int64_t top_k_cnt = limit_param_.is_valid() ? (limit_param_.limit_ + limit_param_.offset_) : INT64_MAX; + if (OB_ISNULL(sort_ctdef_) || OB_ISNULL(sort_rtdef_) || OB_ISNULL(ir_ctdef) || OB_ISNULL(sort_rtdef_->eval_ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null sort def", K(ret), KPC(sort_rtdef_), KPC(sort_ctdef_), KPC(ir_ctdef)); + } else { + for (int i = 0; i < sort_ctdef_->sort_exprs_.count() && OB_SUCC(ret) && OB_ISNULL(search_vec_); ++i) { + ObExpr *expr = sort_ctdef_->sort_exprs_.at(i); + if (expr->type_ == T_FUN_SYS_L2_DISTANCE) { + if (expr->arg_cnt_ != 2) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected arg num", K(ret), K(expr->arg_cnt_)); + } else if (expr->args_[0]->is_const_expr()) { + search_vec_ = expr->args_[0]; + } else if (expr->args_[1]->is_const_expr()) { + search_vec_ = expr->args_[1]; + } + } + } + } + return ret; +} + +int ObVectorIndexLookupOp::init_base_idx_scan_param(const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const sql::ObDASScanCtDef *ctdef, + sql::ObDASScanRtDef *rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + ObTableScanParam &scan_param, + bool reverse_order) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(ctdef) || OB_ISNULL(rtdef)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(ctdef), KPC(rtdef), K(ls_id), K(tablet_id)); + } else { + uint64_t tenant_id = MTL_ID(); + scan_param.tenant_id_ = tenant_id; + scan_param.key_ranges_.set_attr(ObMemAttr(tenant_id, "ScanParamKR")); + scan_param.ss_key_ranges_.set_attr(ObMemAttr(tenant_id, "ScanParamSSKR")); + scan_param.tx_lock_timeout_ = rtdef->tx_lock_timeout_; + scan_param.index_id_ = ctdef->ref_table_id_; + scan_param.is_get_ = false; // scan + scan_param.is_for_foreign_check_ = false; + scan_param.timeout_ = rtdef->timeout_ts_; + scan_param.scan_flag_ = rtdef->scan_flag_; + scan_param.reserved_cell_count_ = ctdef->access_column_ids_.count(); + scan_param.allocator_ = &rtdef->stmt_allocator_; + scan_param.scan_allocator_ = &rtdef->scan_allocator_; + scan_param.sql_mode_ = rtdef->sql_mode_; + scan_param.frozen_version_ = rtdef->frozen_version_; + scan_param.force_refresh_lc_ = rtdef->force_refresh_lc_; + scan_param.output_exprs_ = nullptr; + scan_param.calc_exprs_ = &(ctdef->pd_expr_spec_.calc_exprs_); + scan_param.aggregate_exprs_ = nullptr; + scan_param.table_param_ = &(ctdef->table_param_); + scan_param.op_ = nullptr; + scan_param.row2exprs_projector_ = nullptr; + scan_param.schema_version_ = ctdef->schema_version_; + scan_param.tenant_schema_version_ = rtdef->tenant_schema_version_; + scan_param.limit_param_ = rtdef->limit_param_; + scan_param.need_scn_ = rtdef->need_scn_; + scan_param.pd_storage_flag_ = ctdef->pd_expr_spec_.pd_storage_flag_.pd_flag_; + scan_param.fb_snapshot_ = rtdef->fb_snapshot_; + scan_param.fb_read_tx_uncommitted_ = rtdef->fb_read_tx_uncommitted_; + scan_param.ls_id_ = ls_id; + scan_param.tablet_id_ = tablet_id; + ObQueryFlag query_flag(ObQueryFlag::Forward, // scan_order + false, // daily_merge + false, // optimize + false, // sys scan + true, // full_row + false, // index_back + false, // query_stat + ObQueryFlag::MysqlMode, // sql_mode + false // read_latest + ); + query_flag.scan_order_ = reverse_order ? ObQueryFlag::Reverse : ObQueryFlag::Forward; + scan_param.scan_flag_.flag_ = query_flag.flag_; + if (!ctdef->pd_expr_spec_.pushdown_filters_.empty()) { + scan_param.op_filters_ = &ctdef->pd_expr_spec_.pushdown_filters_; + } + scan_param.pd_storage_filters_ = rtdef->p_pd_expr_op_->pd_storage_filters_; + if (OB_NOT_NULL(tx_desc)) { + scan_param.tx_id_ = tx_desc->get_tx_id(); + } else { + scan_param.tx_id_.reset(); + } + + if (OB_NOT_NULL(snapshot)) { + scan_param.snapshot_ = *snapshot; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("null snapshot", K(ret), KP(snapshot)); + } + + if (FAILEDx(scan_param.column_ids_.assign(ctdef->access_column_ids_))) { + LOG_WARN("failed to init column ids", K(ret)); + } + } + return ret; +} + +int ObVectorIndexLookupOp::gen_scan_range(const int64_t col_cnt, common::ObTableID table_id, ObNewRange &scan_range) +{ + int ret = OB_SUCCESS; + int64_t obj_cnt = col_cnt * 2; + ObObj *obj_ptr = nullptr; + void *buf = nullptr; + if (col_cnt <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected col_cnt", K(ret)); + } else if (OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret)); + } else if (OB_ISNULL(buf = allocator_->alloc(sizeof(ObObj) * obj_cnt))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for rowkey obj", K(ret)); + } else if (OB_ISNULL(obj_ptr = new (buf) ObObj[obj_cnt])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret)); + } else { + for (int i = 0; i < col_cnt; ++i) { + obj_ptr[i].set_min_value(); + obj_ptr[i + col_cnt].set_max_value(); + } + scan_range.table_id_ = table_id; + scan_range.start_key_.assign(obj_ptr, col_cnt); + scan_range.end_key_.assign(&obj_ptr[col_cnt], col_cnt); + scan_range.border_flag_.set_inclusive_start(); + scan_range.border_flag_.set_inclusive_end(); + } + return ret; +} + +int ObVectorIndexLookupOp::init_delta_buffer_scan_param() +{ + int ret = OB_SUCCESS; + ObNewRange scan_range; + if (OB_FAIL(init_base_idx_scan_param(ls_id_, delta_buf_tablet_id_, delta_buf_ctdef_, + delta_buf_rtdef_,tx_desc_, snapshot_, + delta_buf_scan_param_, true))) { + LOG_WARN("failed to generate init delta buffer scan param", K(ret)); + } else if (OB_FAIL(gen_scan_range(DELTA_BUF_PRI_KEY_CNT, delta_buf_ctdef_->ref_table_id_, scan_range))) { + LOG_WARN("failed to generate init delta buffer scan range", K(ret)); + } else if (OB_FAIL(delta_buf_scan_param_.key_ranges_.push_back(scan_range))) { + LOG_WARN("failed to append scan range", K(ret)); + } + return ret; +} + +int ObVectorIndexLookupOp::init_index_id_scan_param() +{ + int ret = OB_SUCCESS; + ObNewRange scan_range; + if (OB_FAIL(init_base_idx_scan_param(ls_id_, index_id_tablet_id_, index_id_ctdef_, + index_id_rtdef_,tx_desc_, snapshot_, + index_id_scan_param_, true))) { + LOG_WARN("failed to generate init delta buffer scan param", K(ret)); + } else if (OB_FAIL(gen_scan_range(INDEX_ID_PRI_KEY_CNT, index_id_ctdef_->ref_table_id_, scan_range))) { + LOG_WARN("failed to generate init delta buffer scan range", K(ret)); + } else if (OB_FAIL(index_id_scan_param_.key_ranges_.push_back(scan_range))) { + LOG_WARN("failed to append scan range", K(ret)); + } + return ret; +} + +int ObVectorIndexLookupOp::init_snapshot_scan_param() +{ + int ret = OB_SUCCESS; + ObNewRange scan_range; + if (OB_FAIL(init_base_idx_scan_param(ls_id_, snapshot_tablet_id_, snapshot_ctdef_, + snapshot_rtdef_,tx_desc_, snapshot_, + snapshot_scan_param_))) { + LOG_WARN("failed to generate init delta buffer scan param", K(ret)); + } else if (OB_FAIL(gen_scan_range(SNAPSHOT_PRI_KEY_CNT, snapshot_ctdef_->ref_table_id_, scan_range))) { + LOG_WARN("failed to generate init delta buffer scan range", K(ret)); + } else if (OB_FAIL(snapshot_scan_param_.key_ranges_.push_back(scan_range))) { + LOG_WARN("failed to append scan range", K(ret)); + } + return ret; +} + +int ObVectorIndexLookupOp::init_com_aux_vec_scan_param() +{ + INIT_SUCC(ret); + ObNewRange scan_range; + int64_t rowkey_cnt = doc_id_lookup_ctdef_->result_output_.count(); + if (nullptr != doc_id_lookup_ctdef_->trans_info_expr_) { + rowkey_cnt = rowkey_cnt - 1; + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(init_base_idx_scan_param(ls_id_, com_aux_vec_tablet_id_, com_aux_vec_ctdef_, + com_aux_vec_rtdef_, tx_desc_, snapshot_, + com_aux_vec_scan_param_))) { + LOG_WARN("failed to generate init vid rowkey scan param", K(ret)); + } + return ret; +} + +int ObVectorIndexLookupOp::reset_lookup_state() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObDomainIndexLookupOp::reset_lookup_state())) { + LOG_WARN("failed to reset lookup state for domain index lookup op", K(ret)); + } else { + if (nullptr != lookup_iter_) { + doc_id_scan_param_.key_ranges_.reuse(); + doc_id_scan_param_.ss_key_ranges_.reuse(); + } + if (nullptr != delta_buf_iter_) { + delta_buf_scan_param_.key_ranges_.reuse(); + delta_buf_scan_param_.ss_key_ranges_.reuse(); + } + } + return ret; +} + +int ObVectorIndexLookupOp::fetch_index_table_rowkey() +{ + int ret = OB_SUCCESS; + ObNewRow *row = nullptr; + if (OB_ISNULL(adaptor_vid_iter_)) { + if (OB_FAIL(process_adaptor_state())) { + LOG_WARN("failed to process_adaptor_state", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(adaptor_vid_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get adaptor_vid_iter", K(ret)); + } else if (OB_FAIL(adaptor_vid_iter_->get_next_row(row))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next next row from text retrieval iter", K(ret)); + } + } else if (OB_ISNULL(row)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should not be null", K(ret)); + } else if (row->get_count() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should not be one row", K(row->get_count()), K(ret)); + } else if (OB_FALSE_IT(doc_id_key_obj_ = row->get_cell(0))) { + } else if (OB_FAIL(set_lookup_vid_key())) { + LOG_WARN("failed to set lookup vid query key", K(ret)); + } + return ret; +} + +int ObVectorIndexLookupOp::fetch_index_table_rowkeys(int64_t &count, const int64_t capacity) +{ + int ret = OB_SUCCESS; + ObNewRow *row = nullptr; + int64_t index_scan_row_cnt = 0; + if (OB_ISNULL(adaptor_vid_iter_)) { + if (OB_FAIL(process_adaptor_state())) { + LOG_WARN("failed to process_adaptor_state", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(adaptor_vid_iter_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get adaptor_vid_iter", K(ret)); + } else if (OB_FALSE_IT(adaptor_vid_iter_->set_batch_size(capacity))) { + } else if (OB_FAIL(adaptor_vid_iter_->get_next_rows(row, index_scan_row_cnt))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next next row from text retrieval iter", K(ret)); + } else { + ret = OB_SUCCESS; + } + } + if (OB_SUCC(ret) && index_scan_row_cnt > 0) { + if (OB_FAIL(set_lookup_vid_keys(row, index_scan_row_cnt))) { + LOG_WARN("failed to set lookup vid query key", K(ret)); + } else { + count += index_scan_row_cnt; + } + } + return ret; +} + +int ObVectorIndexLookupOp::set_lookup_vid_key() +{ + int ret = OB_SUCCESS; + ObNewRange doc_id_range; + ObRowkey doc_id_rowkey(&doc_id_key_obj_, 1); + uint64_t ref_table_id = doc_id_lookup_ctdef_->ref_table_id_; + if (OB_FAIL(doc_id_range.build_range(ref_table_id, doc_id_rowkey))) { + LOG_WARN("build vid lookup range failed", K(ret)); + } else if (OB_FAIL(doc_id_scan_param_.key_ranges_.push_back(doc_id_range))) { + LOG_WARN("store lookup key range failed", K(ret)); + } else { + LOG_DEBUG("generate vid scan range", K(ret), K(doc_id_range)); + } + return ret; +} + +int ObVectorIndexLookupOp::reuse_scan_iter(bool need_switch_param) +{ + int ret = OB_SUCCESS; + + reset_lookup_state(); + ObITabletScan &tsc_service = get_tsc_service(); + doc_id_scan_param_.need_switch_param_ = need_switch_param; + if (OB_NOT_NULL(adaptor_vid_iter_)) { // maybe only reset vid iter when need need_switch_param ? + adaptor_vid_iter_->reset(); + adaptor_vid_iter_->~ObVectorQueryVidIterator(); + if (nullptr != allocator_) { + allocator_->free(adaptor_vid_iter_); + } + adaptor_vid_iter_ = nullptr; + } + if (OB_NOT_NULL(delta_buf_rtdef_)) { + const ObTabletID &scan_tablet_id = delta_buf_scan_param_.tablet_id_; + delta_buf_scan_param_.need_switch_param_ = scan_tablet_id.is_valid() && scan_tablet_id != delta_buf_tablet_id_; + if (OB_FAIL(tsc_service.reuse_scan_iter(delta_buf_scan_param_.need_switch_param_, delta_buf_iter_))) { + LOG_WARN("failed to reuse scan iter", K(ret)); + } else if (nullptr != rowkey_iter_) { + delta_buf_scan_param_.key_ranges_.reuse(); + delta_buf_scan_param_.ss_key_ranges_.reuse(); + } + } + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(index_id_rtdef_)) { + const ObTabletID &scan_tablet_id = index_id_scan_param_.tablet_id_; + index_id_scan_param_.need_switch_param_ = scan_tablet_id.is_valid() && scan_tablet_id != index_id_tablet_id_; + if (OB_FAIL(tsc_service.reuse_scan_iter(index_id_scan_param_.need_switch_param_, index_id_iter_))) { + LOG_WARN("failed to reuse scan iter", K(ret)); + } else if (nullptr != rowkey_iter_) { + index_id_scan_param_.key_ranges_.reuse(); + index_id_scan_param_.ss_key_ranges_.reuse(); + } + } + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(snapshot_rtdef_)) { + const ObTabletID &scan_tablet_id = snapshot_scan_param_.tablet_id_; + snapshot_scan_param_.need_switch_param_ = scan_tablet_id.is_valid() && scan_tablet_id != snapshot_tablet_id_; + if (OB_FAIL(tsc_service.reuse_scan_iter(snapshot_scan_param_.need_switch_param_, snapshot_iter_))) { + LOG_WARN("failed to reuse scan iter", K(ret)); + } else if (nullptr != rowkey_iter_) { + snapshot_scan_param_.key_ranges_.reuse(); + snapshot_scan_param_.ss_key_ranges_.reuse(); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(com_aux_vec_rtdef_)) { + const ObTabletID &scan_tablet_id = com_aux_vec_scan_param_.tablet_id_; + com_aux_vec_scan_param_.need_switch_param_ = scan_tablet_id.is_valid() && scan_tablet_id != com_aux_vec_tablet_id_; + if (OB_FAIL(tsc_service.reuse_scan_iter(com_aux_vec_scan_param_.need_switch_param_, com_aux_vec_iter_))) { + LOG_WARN("failed to reuse scan iter", K(ret)); + } else if (nullptr != rowkey_iter_) { + com_aux_vec_scan_param_.key_ranges_.reuse(); + com_aux_vec_scan_param_.ss_key_ranges_.reuse(); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObDomainIndexLookupOp::reuse_scan_iter())) { + LOG_WARN("failed to reuse scan iter", K(ret)); + } else if (OB_FAIL(tsc_service.reuse_scan_iter(doc_id_scan_param_.need_switch_param_, rowkey_iter_))) { + LOG_WARN("failed to reuse scan iter", K(ret)); + } + return ret; +} + +int ObVectorIndexLookupOp::set_lookup_vid_key(ObRowkey& doc_id_rowkey) +{ + int ret = OB_SUCCESS; + ObNewRange doc_id_range; + uint64_t ref_table_id = doc_id_lookup_ctdef_->ref_table_id_; + if (OB_FAIL(doc_id_range.build_range(ref_table_id, doc_id_rowkey))) { + LOG_WARN("build doc id lookup range failed", K(ret)); + } else if (OB_FAIL(doc_id_scan_param_.key_ranges_.push_back(doc_id_range))) { + LOG_WARN("store lookup key range failed", K(ret)); + } else { + LOG_DEBUG("generate doc id scan range", K(ret), K(doc_id_range)); + } + return ret; +} + +int ObVectorIndexLookupOp::set_lookup_vid_keys(ObNewRow *row, int64_t size) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(row) || row->get_count() <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error row key", K(ret)); + } else { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(*vec_eval_ctx_); + batch_info_guard.set_batch_size(size); + for (int64_t i = 0; OB_SUCC(ret) && i < size; ++i) { + batch_info_guard.set_batch_idx(i); + ObRowkey doc_id_rowkey(&(row->get_cell(i)), 1); + if (OB_FAIL(set_lookup_vid_key(doc_id_rowkey))) { + LOG_WARN("failed to set lookup vid key", K(ret)); + } + } + } + return ret; +} + +int ObVectorIndexLookupOp::get_aux_table_rowkeys(const int64_t lookup_row_cnt) +{ + int ret = OB_SUCCESS; + doc_id_lookup_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); + int64_t rowkey_cnt = 0; + if (index_end_ && doc_id_scan_param_.key_ranges_.empty()) { + ret = OB_ITER_END; + } else if (OB_FAIL(do_aux_table_lookup())) { + LOG_WARN("failed to do aux table lookup", K(ret)); + } else if (OB_FAIL(rowkey_iter_->get_next_rows(rowkey_cnt, lookup_row_cnt))) { + LOG_WARN("failed to get rowkey by vid", K(ret), K(doc_id_scan_param_.key_ranges_)); + } else if (OB_UNLIKELY(lookup_row_cnt != rowkey_cnt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected aux lookup row count not match", K(ret), K(rowkey_cnt), K(lookup_row_cnt)); + } else { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(*doc_id_lookup_rtdef_->eval_ctx_); + batch_info_guard.set_batch_size(lookup_row_cnt); + for (int64_t i = 0; OB_SUCC(ret) && i < lookup_row_cnt; ++i) { + batch_info_guard.set_batch_idx(i); + if (OB_FAIL(set_main_table_lookup_key())) { + LOG_WARN("failed to set main table lookup key", K(ret)); + } + } + } + return ret; +} + +int ObVectorIndexLookupOp::revert_iter_for_complete_data() +{ + INIT_SUCC(ret); + + ObITabletScan &tsc_service = get_tsc_service(); + if (OB_FAIL(tsc_service.revert_scan_iter(com_aux_vec_iter_))) { + LOG_WARN("revert scan iterator failed", K(ret)); + } else { + com_aux_vec_scan_param_.key_ranges_.reuse(); + com_aux_vec_scan_param_.ss_key_ranges_.reuse(); + } + + com_aux_vec_iter_ = NULL; + com_aux_vec_scan_param_.destroy_schema_guard(); + com_aux_vec_scan_param_.~ObTableScanParam(); + //trans_info_array_.destroy(); + + return ret; +} + +int ObVectorIndexLookupOp::vector_do_index_lookup() +{ + int ret = OB_SUCCESS; + ObITabletScan &tsc_service = get_tsc_service(); + ObNewRowIterator *&storage_iter = com_aux_vec_iter_; + if (com_aux_vec_scan_param_.key_ranges_.empty()) { + //do nothing + } else if (storage_iter == nullptr) { + //first index lookup, init scan param and do table scan + if (OB_FAIL(init_com_aux_vec_scan_param())) { + LOG_WARN("init scan param failed", K(ret)); + } else if (OB_FAIL(tsc_service.table_scan(com_aux_vec_scan_param_, + storage_iter))) { + if (OB_SNAPSHOT_DISCARDED == ret && com_aux_vec_scan_param_.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("fail to scan table", K(com_aux_vec_scan_param_), K(ret)); + } + } + } else { + const ObTabletID &storage_tablet_id = com_aux_vec_scan_param_.tablet_id_; + com_aux_vec_scan_param_.need_switch_param_ = + (storage_tablet_id.is_valid() && + storage_tablet_id != com_aux_vec_tablet_id_ ? + true : false); + com_aux_vec_scan_param_.tablet_id_ = tablet_id_; + com_aux_vec_scan_param_.ls_id_ = ls_id_; + ObITabletScan &tsc_service = get_tsc_service(); + if (OB_FAIL(tsc_service.reuse_scan_iter(com_aux_vec_scan_param_.need_switch_param_, com_aux_vec_iter_))) { + LOG_WARN("failed to reuse iter", K(ret)); + } else if (OB_FAIL(tsc_service.table_rescan(com_aux_vec_scan_param_, storage_iter))) { + LOG_WARN("table_rescan scan iter failed", K(ret)); + } + } + return ret; +} + +int ObVectorIndexLookupOp::prepare_state(const ObVidAdaLookupStatus& cur_state, + ObVectorQueryAdaptorResultContext &ada_ctx) +{ + int ret = OB_SUCCESS; + ObITabletScan &tsc_service = get_tsc_service(); + switch(cur_state) { + case ObVidAdaLookupStatus::STATES_INIT: { + if (nullptr == delta_buf_iter_) { + delta_buf_scan_param_.need_switch_param_ = false; + // init doc_id -> rowkey table iterator as rowkey iter + if (OB_FAIL(init_delta_buffer_scan_param())) { + LOG_WARN("failed to init delta buf table lookup scan param", K(ret)); + } else if (OB_FAIL(tsc_service.table_scan(delta_buf_scan_param_, delta_buf_iter_))) { + if (OB_SNAPSHOT_DISCARDED == ret && delta_buf_scan_param_.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("fail to scan table", K(delta_buf_scan_param_), K(ret)); + } + } + } else { + const ObTabletID &scan_tablet_id = delta_buf_scan_param_.tablet_id_; + delta_buf_scan_param_.need_switch_param_ = scan_tablet_id.is_valid() && (delta_buf_tablet_id_ != scan_tablet_id); + delta_buf_scan_param_.tablet_id_ = delta_buf_tablet_id_; + delta_buf_scan_param_.ls_id_ = ls_id_; + if (OB_FAIL(tsc_service.reuse_scan_iter(delta_buf_scan_param_.need_switch_param_, delta_buf_iter_))) { + LOG_WARN("failed to reuse delta buf table scan iterator", K(ret)); + } else if (OB_FAIL(tsc_service.table_rescan(delta_buf_scan_param_, delta_buf_iter_))) { + LOG_WARN("failed to rescan delta buf table rowkey table", K(ret), K_(delta_buf_tablet_id), K(scan_tablet_id)); + } + } + break; + } + case ObVidAdaLookupStatus::QUERY_ROWKEY_VEC: { + ObObj *vectors = nullptr; + ObSEArray vector_column_ids; + int64_t dim = ada_ctx.get_dim(); + int64_t res_count = 0; + int64_t count = ada_ctx.get_count(); + + if (OB_ISNULL(vectors = ada_ctx.get_vids())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get vectors.", K(ret)); + } + + for (int i = 0; OB_SUCC(ret) && i < count; i++) { + ObRowkey vid_id_rowkey(&(vectors[i]), 1); + if (OB_FAIL(set_lookup_vid_key(vid_id_rowkey))) { + LOG_WARN("failed to set vid rowkey id.", K(ret)); + } else if (OB_FAIL(get_cmpt_aux_table_rowkey())) { + if (ret != OB_ITER_END) { + LOG_WARN("do aux index lookup failed", K(ret)); + } else { + ret = OB_SUCCESS; + } + } else if (OB_FAIL(vector_do_index_lookup())) { + LOG_WARN("failed to lookup.", K(ret)); + } else { + ObNewRowIterator *storage_iter = com_aux_vec_iter_; + storage::ObTableScanIterator *table_scan_iter = dynamic_cast(storage_iter); + ObString vector; + blocksstable::ObDatumRow *datum_row = nullptr; + if (OB_FAIL(table_scan_iter->get_next_row(datum_row))) { + if (OB_ITER_END != ret) { + LOG_WARN("failed to get next row from next table.", K(ret)); + } else { + ret = OB_SUCCESS; + } + } else if (datum_row->get_column_count() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get row column cnt invalid.", K(ret), K(datum_row->get_column_count())); + } else if (OB_FALSE_IT(vector = datum_row->storage_datums_[0].get_string())) { + LOG_WARN("failed to get vid.", K(ret)); + } else if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(allocator_, + ObLongTextType, + CS_TYPE_BINARY, + com_aux_vec_ctdef_->result_output_.at(0)->obj_meta_.has_lob_header(), + vector))) { + LOG_WARN("failed to get real data.", K(ret)); + } else { + ada_ctx.set_vector(res_count++, vector.ptr(), vector.length()); + doc_id_scan_param_.key_ranges_.reset(); + com_aux_vec_scan_param_.key_ranges_.reset(); + } + } + } + + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObLocalIndexLookupOp::reset_lookup_state())) { + LOG_WARN("failed to reset look up status.", K(ret)); + } else if (OB_FAIL(ObVectorIndexLookupOp::revert_iter_for_complete_data())) { + LOG_WARN("failed to revert vid rowkey iter.", K(ret)); + } + break; + } + + case ObVidAdaLookupStatus::QUERY_INDEX_ID_TBL: { + if (nullptr == index_id_iter_) { + index_id_scan_param_.need_switch_param_ = false; + // init doc_id -> rowkey table iterator as rowkey iter + if (OB_FAIL(init_index_id_scan_param())) { + LOG_WARN("failed to init index id lookup scan param", K(ret)); + } else if (OB_FAIL(tsc_service.table_scan(index_id_scan_param_, index_id_iter_))) { + if (OB_SNAPSHOT_DISCARDED == ret && index_id_scan_param_.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("fail to scan table", K(index_id_scan_param_), K(ret)); + } + } + } else { + const ObTabletID &scan_tablet_id = index_id_scan_param_.tablet_id_; + index_id_scan_param_.need_switch_param_ = scan_tablet_id.is_valid() && (index_id_tablet_id_ != scan_tablet_id); + index_id_scan_param_.tablet_id_ = index_id_tablet_id_; + index_id_scan_param_.ls_id_ = ls_id_; + if (OB_FAIL(tsc_service.reuse_scan_iter(index_id_scan_param_.need_switch_param_, index_id_iter_))) { + LOG_WARN("failed to reuse index id iter iterator", K(ret)); + } else if (OB_FAIL(tsc_service.table_rescan(index_id_scan_param_, index_id_iter_))) { + LOG_WARN("failed to rescan index id iter rowkey table", K(ret), K_(index_id_tablet_id), K(scan_tablet_id)); + } + } + break; + } + case ObVidAdaLookupStatus::QUERY_SNAPSHOT_TBL: { + if (nullptr == snapshot_iter_) { + snapshot_scan_param_.need_switch_param_ = false; + // init doc_id -> rowkey table iterator as rowkey iter + if (OB_FAIL(init_snapshot_scan_param())) { + LOG_WARN("failed to init snapshot table lookup scan param", K(ret)); + } else if (OB_FAIL(tsc_service.table_scan(snapshot_scan_param_, snapshot_iter_))) { + if (OB_SNAPSHOT_DISCARDED == ret && snapshot_scan_param_.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("fail to scan table", K(snapshot_scan_param_), K(ret)); + } + } + } else { + const ObTabletID &scan_tablet_id = snapshot_scan_param_.tablet_id_; + snapshot_scan_param_.need_switch_param_ = scan_tablet_id.is_valid() && (snapshot_tablet_id_ != scan_tablet_id); + snapshot_scan_param_.tablet_id_ = snapshot_tablet_id_; + snapshot_scan_param_.ls_id_ = ls_id_; + if (OB_FAIL(tsc_service.reuse_scan_iter(snapshot_scan_param_.need_switch_param_, snapshot_iter_))) { + LOG_WARN("failed to reuse sanpshot iterator", K(ret)); + } else if (OB_FAIL(tsc_service.table_rescan(snapshot_scan_param_, snapshot_iter_))) { + LOG_WARN("failed to rescan snapshot rowkey table", K(ret), K_(snapshot_tablet_id), K(scan_tablet_id)); + } + } + break; + } + case ObVidAdaLookupStatus::STATES_END: { + // do nothing + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected status.", K(ret)); + break; + } + } + return ret; +} +int ObVectorIndexLookupOp::call_pva_interface(const ObVidAdaLookupStatus& cur_state, + ObVectorQueryAdaptorResultContext& ada_ctx, + ObPluginVectorIndexAdaptor &adaptor) +{ + int ret = OB_SUCCESS; + switch(cur_state) { + case ObVidAdaLookupStatus::STATES_INIT: { + if (OB_FAIL(adaptor.check_delta_buffer_table_readnext_status(&ada_ctx, delta_buf_iter_, delta_buf_scan_param_.snapshot_.core_.version_))) { + LOG_WARN("fail to check_delta_buffer_table_readnext_status.", K(ret)); + } + break; + } + case ObVidAdaLookupStatus::QUERY_ROWKEY_VEC: { + if (OB_FAIL(adaptor.complete_delta_buffer_table_data(&ada_ctx))) { + LOG_WARN("failed to complete_delta_buffer_table_data.", K(ret)); + } + break; + } + case ObVidAdaLookupStatus::QUERY_INDEX_ID_TBL: { + if (!index_id_scan_param_.snapshot_.is_valid() || !index_id_scan_param_.snapshot_.core_.version_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get index id scan param invalid.", K(ret)); + } else if (OB_FAIL(adaptor.check_index_id_table_readnext_status(&ada_ctx, index_id_iter_, index_id_scan_param_.snapshot_.core_.version_))) { + LOG_WARN("fail to check_index_id_table_readnext_status.", K(ret)); + } + break; + } + case ObVidAdaLookupStatus::QUERY_SNAPSHOT_TBL: { + if (OB_FAIL(adaptor.check_snapshot_table_wait_status(&ada_ctx))) { + LOG_WARN("fail to check_snapshot_table_wait_status.", K(ret)); + } + break; + } + case ObVidAdaLookupStatus::STATES_END: { + ObVectorQueryConditions query_cond; + if (OB_FAIL(set_vector_query_condition(query_cond))) { + LOG_WARN("fail to set query condition.", K(ret)); + } else if (OB_FAIL(adaptor.query_result(&ada_ctx, &query_cond, adaptor_vid_iter_))) { + LOG_WARN("fail to query result.", K(ret)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected status.", K(ret)); + break; + } + } + return ret; +} + +int ObVectorIndexLookupOp::process_adaptor_state() +{ + int ret = OB_SUCCESS; + bool is_continue = true; + ObPluginVectorIndexService *vec_index_service = MTL(ObPluginVectorIndexService *); + ObVidAdaLookupStatus last_state = ObVidAdaLookupStatus::STATES_ERROR; + ObVidAdaLookupStatus cur_state = ObVidAdaLookupStatus::STATES_INIT; + ObArenaAllocator tmp_allocator("VectorAdaptor", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); // use for tmp query and data complement + ObVectorQueryAdaptorResultContext ada_ctx(allocator_, &tmp_allocator); + share::ObVectorIndexAcquireCtx index_ctx; + ObPluginVectorIndexAdapterGuard adaptor_guard; + index_ctx.inc_tablet_id_ = delta_buf_tablet_id_; + index_ctx.vbitmap_tablet_id_ = index_id_tablet_id_; + index_ctx.snapshot_tablet_id_ = snapshot_tablet_id_; + index_ctx.data_tablet_id_ = tablet_id_; + if (OB_ISNULL(delta_buf_ctdef_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should not be null", K(ret)); + } else if (OB_FAIL(vec_index_service->acquire_adapter_guard(ls_id_, index_ctx, adaptor_guard, &vec_index_param_, dim_))) { + LOG_WARN("fail to get ObMockPluginVectorIndexAdapter", K(ret)); + } else { + share::ObPluginVectorIndexAdaptor* adaptor = adaptor_guard.get_adatper(); + if (OB_ISNULL(adaptor)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("shouldn't be null.", K(ret)); + } else { + while (OB_SUCC(ret) && is_continue) { + if (last_state != cur_state && OB_FAIL(prepare_state(cur_state, ada_ctx))) { + LOG_WARN("failed to prepare state", K(ret)); + } else if (OB_FAIL(call_pva_interface(cur_state, ada_ctx, *adaptor))) { + LOG_WARN("failed to call_pva_interface", K(ret)); + } else if (OB_FALSE_IT(last_state = cur_state)) { + } else if (OB_FAIL(next_state(cur_state, ada_ctx, is_continue))) { + LOG_WARN("fail to get next status.", K(cur_state), K(ada_ctx.get_status()), K(ret)); + } + } + } + } + return ret; +} + +int ObVectorIndexLookupOp::next_state(ObVidAdaLookupStatus& cur_state, + ObVectorQueryAdaptorResultContext& ada_ctx, + bool& is_continue) +{ + int ret = OB_SUCCESS; + switch(cur_state) { + case ObVidAdaLookupStatus::STATES_INIT: { + if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_WAIT) { + is_continue = true; + } else if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_LACK_SCN) { + cur_state = ObVidAdaLookupStatus::QUERY_INDEX_ID_TBL; + is_continue = true; + } else if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_OK) { + cur_state = ObVidAdaLookupStatus::STATES_END; + is_continue = true; + } else if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_INVALID_SCN) { + cur_state = ObVidAdaLookupStatus::STATES_ERROR; + is_continue = false; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected status.", K(ada_ctx.get_status()), K(ret)); + } + break; + } + case ObVidAdaLookupStatus::QUERY_ROWKEY_VEC: { + if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_LACK_SCN) { + cur_state = ObVidAdaLookupStatus::QUERY_SNAPSHOT_TBL; + is_continue = true; + } else if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_INVALID_SCN) { + cur_state = ObVidAdaLookupStatus::STATES_ERROR; + is_continue = false; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected status.", K(ada_ctx.get_status()), K(ret)); + } + break; + } + case ObVidAdaLookupStatus::QUERY_INDEX_ID_TBL: { + if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_WAIT) { + is_continue = true; + } else if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_COM_DATA) { + cur_state = ObVidAdaLookupStatus::QUERY_ROWKEY_VEC; + is_continue = true; + } else if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_LACK_SCN) { + cur_state = ObVidAdaLookupStatus::QUERY_SNAPSHOT_TBL; + is_continue = true; + } else if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_OK) { + cur_state = ObVidAdaLookupStatus::STATES_END; + is_continue = true; + } else if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_INVALID_SCN) { + cur_state = ObVidAdaLookupStatus::STATES_ERROR; + is_continue = false; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected status.", K(ada_ctx.get_status()), K(ret)); + } + break; + } + case ObVidAdaLookupStatus::QUERY_SNAPSHOT_TBL: { + if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_WAIT) { + is_continue = true; + } else if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_OK) { + cur_state = ObVidAdaLookupStatus::STATES_END; + is_continue = true; + } else if (ada_ctx.get_status() == PluginVectorQueryResStatus::PVQ_INVALID_SCN) { + cur_state = ObVidAdaLookupStatus::STATES_ERROR; + is_continue = false; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected status.", K(ada_ctx.get_status()), K(ret)); + } + break; + } + case ObVidAdaLookupStatus::STATES_END: { + is_continue = false; + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected status.", K(ada_ctx.get_status()), K(ret)); + is_continue = false; + break; + } + } + return ret; +} +int ObVectorIndexLookupOp::get_aux_table_rowkey() +{ + int ret = OB_SUCCESS; + doc_id_lookup_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); + if (index_end_ && doc_id_scan_param_.key_ranges_.empty()) { + ret = OB_ITER_END; + } else if (OB_FAIL(do_aux_table_lookup())) { + LOG_WARN("failed to do aux table lookup", K(ret)); + } else if (OB_FAIL(rowkey_iter_->get_next_row())) { + LOG_WARN("failed to get rowkey by vid", K(ret)); + } else if (OB_FAIL(set_main_table_lookup_key())) { + LOG_WARN("failed to set main table lookup key", K(ret)); + } + return ret; +} + +int ObVectorIndexLookupOp::get_cmpt_aux_table_rowkey() +{ + int ret = OB_SUCCESS; + doc_id_lookup_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); + if (index_end_ && doc_id_scan_param_.key_ranges_.empty()) { + ret = OB_ITER_END; + } else if (OB_FAIL(do_aux_table_lookup())) { + LOG_WARN("failed to do aux table lookup", K(ret)); + } else if (OB_FAIL(rowkey_iter_->get_next_row())) { + LOG_WARN("failed to get rowkey by vid", K(ret)); + } else if (OB_FAIL(set_com_main_table_lookup_key())) { + LOG_WARN("failed to set main table lookup key", K(ret)); + } + return ret; +} + +int ObVectorIndexLookupOp::set_com_main_table_lookup_key() +{ + int ret = OB_SUCCESS; + int64_t rowkey_cnt = doc_id_lookup_ctdef_->result_output_.count(); + void *buf = nullptr; + ObObj *obj_ptr = nullptr; + common::ObArenaAllocator &lookup_alloc = lookup_memctx_->get_arena_allocator(); + ObNewRange lookup_range; + if (nullptr != doc_id_lookup_ctdef_->trans_info_expr_) { + rowkey_cnt = rowkey_cnt - 1; + } + + if (OB_UNLIKELY(rowkey_cnt <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid rowkey cnt", K(ret), KPC(doc_id_lookup_ctdef_)); + } else if (OB_ISNULL(buf = lookup_alloc.alloc(sizeof(ObObj) * rowkey_cnt))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(rowkey_cnt)); + } else { + obj_ptr = new (buf) ObObj[rowkey_cnt]; + } + + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_cnt; ++i) { + ObObj tmp_obj; + ObExpr *expr = doc_id_lookup_ctdef_->result_output_.at(i); + if (T_PSEUDO_GROUP_ID == expr->type_) { + // do nothing + } else { + ObDatum &col_datum = expr->locate_expr_datum(*doc_id_lookup_rtdef_->eval_ctx_); + if (OB_ISNULL(col_datum.ptr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get col datum null", K(ret)); + } else if (OB_FAIL(col_datum.to_obj(tmp_obj, expr->obj_meta_, expr->obj_datum_map_))) { + LOG_WARN("convert datum to obj failed", K(ret)); + } else if (OB_FAIL(ob_write_obj(lookup_alloc, tmp_obj, obj_ptr[i]))) { + LOG_WARN("deep copy rowkey value failed", K(ret), K(tmp_obj)); + } + } + } + + if (OB_SUCC(ret)) { + ObRowkey table_rowkey(obj_ptr, rowkey_cnt); + if (OB_FAIL(lookup_range.build_range(com_aux_vec_ctdef_->ref_table_id_, table_rowkey))) { + LOG_WARN("failed to build lookup range", K(ret), K(table_rowkey)); + } else if (OB_FAIL(com_aux_vec_scan_param_.key_ranges_.push_back(lookup_range))) { + LOG_WARN("store lookup key range failed", K(ret), K(scan_param_)); + } else { + LOG_DEBUG("get rowkey from docid rowkey table", K(ret), K(table_rowkey), K(lookup_range)); + } + } + return ret; +} + +int ObVectorIndexLookupOp::set_main_table_lookup_key() +{ + int ret = OB_SUCCESS; + int64_t rowkey_cnt = doc_id_lookup_ctdef_->result_output_.count(); + void *buf = nullptr; + ObObj *obj_ptr = nullptr; + common::ObArenaAllocator &lookup_alloc = lookup_memctx_->get_arena_allocator(); + ObNewRange lookup_range; + if (nullptr != doc_id_lookup_ctdef_->trans_info_expr_) { + rowkey_cnt = rowkey_cnt - 1; + } + + if (OB_UNLIKELY(rowkey_cnt <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid rowkey cnt", K(ret), KPC(doc_id_lookup_ctdef_)); + } else if (OB_ISNULL(buf = lookup_alloc.alloc(sizeof(ObObj) * rowkey_cnt))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(rowkey_cnt)); + } else { + obj_ptr = new (buf) ObObj[rowkey_cnt]; + } + + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_cnt; ++i) { + ObObj tmp_obj; + ObExpr *expr = doc_id_lookup_ctdef_->result_output_.at(i); + if (T_PSEUDO_GROUP_ID == expr->type_) { + // do nothing + } else { + ObDatum &col_datum = expr->locate_expr_datum(*doc_id_lookup_rtdef_->eval_ctx_); + if (OB_FAIL(col_datum.to_obj(tmp_obj, expr->obj_meta_, expr->obj_datum_map_))) { + LOG_WARN("convert datum to obj failed", K(ret)); + } else if (OB_FAIL(ob_write_obj(lookup_alloc, tmp_obj, obj_ptr[i]))) { + LOG_WARN("deep copy rowkey value failed", K(ret), K(tmp_obj)); + } + } + } + + if (OB_SUCC(ret)) { + ObRowkey table_rowkey(obj_ptr, rowkey_cnt); + if (OB_FAIL(lookup_range.build_range(lookup_ctdef_->ref_table_id_, table_rowkey))) { + LOG_WARN("failed to build lookup range", K(ret), K(table_rowkey)); + } else if (OB_FAIL(scan_param_.key_ranges_.push_back(lookup_range))) { + LOG_WARN("store lookup key range failed", K(ret), K(scan_param_)); + } else { + LOG_DEBUG("get rowkey from docid rowkey table", K(ret), K(table_rowkey), K(lookup_range)); + } + } + return ret; +} + +int ObVectorIndexLookupOp::do_aux_table_lookup() +{ + int ret = OB_SUCCESS; + ObITabletScan &tsc_service = get_tsc_service(); + if (nullptr == rowkey_iter_) { + doc_id_scan_param_.need_switch_param_ = false; + // init doc_id -> rowkey table iterator as rowkey iter + if (OB_FAIL(set_doc_id_idx_lookup_param( + doc_id_lookup_ctdef_, doc_id_lookup_rtdef_, doc_id_scan_param_, doc_id_idx_tablet_id_, ls_id_))) { + LOG_WARN("failed to init vid lookup scan param", K(ret)); + } else if (OB_FAIL(tsc_service.table_scan(doc_id_scan_param_, rowkey_iter_))) { + if (OB_SNAPSHOT_DISCARDED == ret && scan_param_.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("fail to scan table", K(scan_param_), K(ret)); + } + } + } else { + const ObTabletID &scan_tablet_id = doc_id_scan_param_.tablet_id_; + doc_id_scan_param_.need_switch_param_ = scan_tablet_id.is_valid() && (doc_id_idx_tablet_id_ != scan_tablet_id); + doc_id_scan_param_.tablet_id_ = doc_id_idx_tablet_id_; + doc_id_scan_param_.ls_id_ = ls_id_; + if (OB_FAIL(tsc_service.reuse_scan_iter(doc_id_scan_param_.need_switch_param_, rowkey_iter_))) { + LOG_WARN("failed to reuse vid iterator", K(ret)); + } else if (OB_FAIL(tsc_service.table_rescan(doc_id_scan_param_, rowkey_iter_))) { + LOG_WARN("failed to rescan vid rowkey table", K(ret), K_(doc_id_idx_tablet_id), K(scan_tablet_id)); + } + } + return ret; +} + +void ObVectorIndexLookupOp::do_clear_evaluated_flag() +{ + if OB_NOT_NULL(delta_buf_rtdef_) { + delta_buf_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); + } + if OB_NOT_NULL(index_id_rtdef_) { + index_id_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); + } + if OB_NOT_NULL(snapshot_rtdef_) { + snapshot_rtdef_->p_pd_expr_op_->clear_evaluated_flag(); + } + return ObDomainIndexLookupOp::do_clear_evaluated_flag(); +} + +int ObVectorIndexLookupOp::revert_iter() +{ + int ret = OB_SUCCESS; + ObITabletScan &tsc_service = get_tsc_service(); + if (nullptr != adaptor_vid_iter_) { + adaptor_vid_iter_->reset(); + adaptor_vid_iter_->~ObVectorQueryVidIterator(); + if (nullptr != allocator_) { + allocator_->free(adaptor_vid_iter_); + } + adaptor_vid_iter_ = nullptr; + } + + if (OB_NOT_NULL(delta_buf_rtdef_)) { + delta_buf_scan_param_.need_switch_param_ = false; + delta_buf_scan_param_.destroy_schema_guard(); + delta_buf_scan_param_.~ObTableScanParam(); + } + if (OB_NOT_NULL(index_id_rtdef_)) { + index_id_scan_param_.need_switch_param_ = false; + index_id_scan_param_.destroy_schema_guard(); + index_id_scan_param_.~ObTableScanParam(); + } + if (OB_NOT_NULL(snapshot_rtdef_)) { + snapshot_scan_param_.need_switch_param_ = false; + snapshot_scan_param_.destroy_schema_guard(); + snapshot_scan_param_.~ObTableScanParam(); + } + + if (OB_NOT_NULL(com_aux_vec_rtdef_)) { + com_aux_vec_scan_param_.need_switch_param_ = false; + com_aux_vec_scan_param_.destroy_schema_guard(); + com_aux_vec_scan_param_.~ObTableScanParam(); + } + + if (OB_FAIL(tsc_service.revert_scan_iter(delta_buf_iter_))) { + LOG_WARN("revert scan iterator failed", K(ret)); + } else if (OB_FAIL(tsc_service.revert_scan_iter(index_id_iter_))) { + LOG_WARN("revert scan iterator failed", K(ret)); + } else if (OB_FAIL(tsc_service.revert_scan_iter(snapshot_iter_))) { + LOG_WARN("revert scan iterator failed", K(ret)); + } else if (OB_FAIL(tsc_service.revert_scan_iter(aux_lookup_iter_))) { + LOG_WARN("revert index table scan iterator (opened by dasop) failed", K(ret)); + } else { + delta_buf_iter_ = nullptr; + index_id_iter_ = nullptr; + snapshot_iter_ = nullptr; + aux_lookup_iter_ = nullptr; + if (OB_FAIL(ObDomainIndexLookupOp::revert_iter())) { + LOG_WARN("failed to revert local index lookup op iter", K(ret)); + } + } + return ret; +} + +int ObVectorIndexLookupOp::set_vector_query_condition(ObVectorQueryConditions &query_cond) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(search_vec_) || OB_ISNULL(sort_rtdef_) || OB_ISNULL(sort_rtdef_->eval_ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null.", K(ret)); + } else { + query_cond.query_limit_ = limit_param_.limit_ + limit_param_.offset_; + query_cond.query_order_ = true; + query_cond.row_iter_ = snapshot_iter_; + query_cond.query_scn_ = snapshot_scan_param_.snapshot_.core_.version_; + ObSQLSessionInfo *session = nullptr; + uint64_t ob_hnsw_ef_search = 0; + ObDatum *vec_datum = NULL; + if (OB_FALSE_IT(session = sort_rtdef_->eval_ctx_->exec_ctx_.get_my_session())) { + } else if (OB_ISNULL(session)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get table schema", K(ret), KPC(session)); + } else if (OB_FAIL(session->get_ob_hnsw_ef_search(ob_hnsw_ef_search))) { + LOG_WARN("fail to get ob_hnsw_ef_search", K(ret)); + } else if (OB_FALSE_IT(query_cond.ef_search_ = ob_hnsw_ef_search)) { + } else if (OB_UNLIKELY(OB_FAIL(search_vec_->eval(*(sort_rtdef_->eval_ctx_), vec_datum)))) { + LOG_WARN("eval vec arg failed", K(ret)); + } else if (OB_FALSE_IT(query_cond.query_vector_ = vec_datum->get_string())) { + } else if (OB_FAIL(sql::ObTextStringHelper::read_real_string_data(allocator_, + ObLongTextType, + CS_TYPE_BINARY, + search_vec_->obj_meta_.has_lob_header(), + query_cond.query_vector_))) { + LOG_WARN("failed to get real data.", K(ret)); + } + } + return ret; +} + + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/das/ob_vector_index_lookup_op.h b/src/sql/das/ob_vector_index_lookup_op.h new file mode 100644 index 0000000000..607204585d --- /dev/null +++ b/src/sql/das/ob_vector_index_lookup_op.h @@ -0,0 +1,182 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#ifndef OBDEV_SRC_SQL_DAS_OB_VECTOR_INDEX_LOOKUP_OP_H_ +#define OBDEV_SRC_SQL_DAS_OB_VECTOR_INDEX_LOOKUP_OP_H_ +#include "sql/das/ob_domain_index_lookup_op.h" +#include "src/share/vector_index/ob_plugin_vector_index_service.h" +namespace oceanbase +{ +namespace sql +{ + +enum ObVidAdaLookupStatus +{ + STATES_INIT, + QUERY_INDEX_ID_TBL, + QUERY_SNAPSHOT_TBL, + QUERY_ROWKEY_VEC, + STATES_END, + STATES_ERROR +}; + +class ObVectorIndexLookupOp : public ObDomainIndexLookupOp +{ +public: + ObVectorIndexLookupOp(ObIAllocator &allocator) + : ObDomainIndexLookupOp(allocator), + aux_lookup_iter_(nullptr), + adaptor_vid_iter_(nullptr), + search_vec_(nullptr), + delta_buf_tablet_id_(ObTabletID::INVALID_TABLET_ID), + index_id_tablet_id_(ObTabletID::INVALID_TABLET_ID), + snapshot_tablet_id_(ObTabletID::INVALID_TABLET_ID), + delta_buf_scan_param_(), + index_id_scan_param_(), + snapshot_scan_param_(), + com_aux_vec_scan_param_(), + delta_buf_iter_(nullptr), + index_id_iter_(nullptr), + snapshot_iter_(nullptr), + com_aux_vec_iter_(nullptr), + delta_buf_ctdef_(nullptr), + delta_buf_rtdef_(nullptr), + index_id_ctdef_(nullptr), + index_id_rtdef_(nullptr), + snapshot_ctdef_(nullptr), + snapshot_rtdef_(nullptr), + com_aux_vec_ctdef_(nullptr), + com_aux_vec_rtdef_(nullptr), + vec_eval_ctx_(nullptr), + limit_param_(), + sort_ctdef_(nullptr), + sort_rtdef_(nullptr), + is_inited_(false), + vec_index_param_(), + dim_(0) {} + virtual ~ObVectorIndexLookupOp() {}; + int init(const ObDASBaseCtDef *table_lookup_ctdef, + ObDASBaseRtDef *table_lookup_rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + storage::ObTableScanParam &scan_param); // init for OP + virtual int reset_lookup_state() override; + virtual int do_aux_table_lookup(); + virtual int revert_iter() override; + virtual void do_clear_evaluated_flag() override; + int reuse_scan_iter(bool need_switch_param); + void set_aux_lookup_iter(common::ObNewRowIterator *aux_lookup_iter) + { + aux_lookup_iter_ = aux_lookup_iter; + } + void set_aux_table_id(ObTabletID delta_buf_tablet_id, + ObTabletID index_id_tablet_id, + ObTabletID snapshot_tablet_id, + ObTabletID com_aux_vec_tablet_id) + { + delta_buf_tablet_id_ = delta_buf_tablet_id; + index_id_tablet_id_ = index_id_tablet_id; + snapshot_tablet_id_ = snapshot_tablet_id; + com_aux_vec_tablet_id_ = com_aux_vec_tablet_id; + } + void set_sort_ctdef(const ObDASSortCtDef *sort_ctdef) { sort_ctdef_ = sort_ctdef;} + void set_sort_rtdef(ObDASSortRtDef *sort_rtdef) { sort_rtdef_ = sort_rtdef;} + int init_sort(const ObDASVecAuxScanCtDef *ir_ctdef, + ObDASVecAuxScanRtDef *ir_rtdef); + int init_limit(const ObDASVecAuxScanCtDef *ir_ctdef, + ObDASVecAuxScanRtDef *ir_rtdef); + int revert_iter_for_complete_data(); + int prepare_state(const ObVidAdaLookupStatus& cur_state, ObVectorQueryAdaptorResultContext &ada_ctx); + int vector_do_index_lookup(); + int get_cmpt_aux_table_rowkey(); + int do_vid_rowkey_table_scan(); + int set_com_main_table_lookup_key(); + void set_dim(int64_t dim) {dim_ = dim;} + int set_vec_index_param(ObString vec_index_param) { return ob_write_string(*allocator_, vec_index_param, vec_index_param_); } +protected: + virtual int fetch_index_table_rowkey() override; + virtual int fetch_index_table_rowkeys(int64_t &count, const int64_t capacity) override; + virtual int get_aux_table_rowkey() override; + virtual int get_aux_table_rowkeys(const int64_t lookup_row_cnt) override; +private: + int init_delta_buffer_scan_param(); + int init_index_id_scan_param(); + int init_snapshot_scan_param(); + int init_vid_rowkey_scan_param(); + int init_com_aux_vec_scan_param(); + int process_adaptor_state(); + int prepare_state(const ObVidAdaLookupStatus& cur_state); + int call_pva_interface(const ObVidAdaLookupStatus& cur_state, + ObVectorQueryAdaptorResultContext& ada_ctx, + ObPluginVectorIndexAdaptor &adaptor); + int next_state(ObVidAdaLookupStatus& cur_states, + ObVectorQueryAdaptorResultContext& ada_ctx, + bool& is_continue); + int set_lookup_vid_key(); + int set_lookup_vid_key(ObRowkey& doc_id_rowkey); + int set_lookup_vid_keys(ObNewRow *row, int64_t count); + int set_main_table_lookup_key(); + static int init_base_idx_scan_param(const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + const sql::ObDASScanCtDef *ctdef, + sql::ObDASScanRtDef *rtdef, + transaction::ObTxDesc *tx_desc, + transaction::ObTxReadSnapshot *snapshot, + ObTableScanParam &scan_param, + bool reverse_order = false); + int gen_scan_range(const int64_t obj_cnt, common::ObTableID table_id, ObNewRange &scan_range); + int set_vector_query_condition(ObVectorQueryConditions &query_cond); +private: + static const int64_t DELTA_BUF_PRI_KEY_CNT = 2; + static const int64_t INDEX_ID_PRI_KEY_CNT = 3; + static const int64_t SNAPSHOT_PRI_KEY_CNT = 1; + static const uint64_t MAX_VSAG_QUERY_RES_SIZE = 16384; +private: + common::ObNewRowIterator *aux_lookup_iter_; + ObVectorQueryVidIterator* adaptor_vid_iter_; + ObExpr* search_vec_; + ObTabletID delta_buf_tablet_id_; + ObTabletID index_id_tablet_id_; + ObTabletID snapshot_tablet_id_; + ObTabletID com_aux_vec_tablet_id_; + // delte buffer table scan + ObTableScanParam delta_buf_scan_param_; + // index id table scan + ObTableScanParam index_id_scan_param_; + // snapshot table scan + ObTableScanParam snapshot_scan_param_; + // aux vector table scan + ObTableScanParam com_aux_vec_scan_param_; + common::ObNewRowIterator *delta_buf_iter_; + common::ObNewRowIterator *index_id_iter_; + common::ObNewRowIterator *snapshot_iter_; + common::ObNewRowIterator *com_aux_vec_iter_; + const ObDASScanCtDef *delta_buf_ctdef_; + ObDASScanRtDef *delta_buf_rtdef_; + const ObDASScanCtDef *index_id_ctdef_; + ObDASScanRtDef *index_id_rtdef_; + const ObDASScanCtDef *snapshot_ctdef_; + ObDASScanRtDef *snapshot_rtdef_; + const ObDASScanCtDef *com_aux_vec_ctdef_; + ObDASScanRtDef *com_aux_vec_rtdef_; + ObEvalCtx *vec_eval_ctx_; + common::ObLimitParam limit_param_; + const ObDASSortCtDef *sort_ctdef_; + ObDASSortRtDef *sort_rtdef_; + // init + bool is_inited_; + ObString vec_index_param_; + int64_t dim_; +}; + +} // namespace sql +} // namespace oceanbase +#endif /* OBDEV_SRC_SQL_DAS_OB_VECTOR_INDEX_LOOKUP_OP_H_ */ diff --git a/src/sql/engine/aggregate/ob_aggregate_processor.cpp b/src/sql/engine/aggregate/ob_aggregate_processor.cpp index dc28739ebc..5c876b6ecc 100644 --- a/src/sql/engine/aggregate/ob_aggregate_processor.cpp +++ b/src/sql/engine/aggregate/ob_aggregate_processor.cpp @@ -19,6 +19,7 @@ #include "sql/engine/expr/ob_expr_less_than.h" #include "sql/engine/expr/ob_expr_div.h" #include "sql/engine/expr/ob_expr_result_type_util.h" +#include "sql/engine/expr/ob_array_expr_utils.h" #include "sql/engine/ob_exec_context.h" #include "sql/engine/expr/ob_expr_estimate_ndv.h" #include "sql/engine/user_defined_function/ob_udf_util.h" @@ -5446,7 +5447,8 @@ int ObAggregateProcessor::prepare_add_calc( break; } case ObFloatTC: - case ObDoubleTC: { + case ObDoubleTC: + case ObCollectionSQLTC: { ret = clone_aggr_cell(aggr_cell, first_value, false); break; } @@ -5626,6 +5628,14 @@ int ObAggregateProcessor::add_calc( } break; } + case ObCollectionSQLTC: { + if (result_datum.is_null()) { + ret = clone_aggr_cell(aggr_cell, iter_value, false); + } else if (OB_FAIL(ObArrayExprUtils::vector_datum_add(result_datum, iter_value, aggr_alloc_))) { + LOG_WARN("failed to add vector", K(ret)); + } + break; + } default: { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected type", K(column_tc), K(ret)); @@ -5790,6 +5800,14 @@ int ObAggregateProcessor::sub_calc( } break; } + case ObCollectionSQLTC: { + if (result_datum.is_null()) { + ret = clone_aggr_cell(aggr_cell, iter_value, false); + } else if (OB_FAIL(ObArrayExprUtils::vector_datum_add(result_datum, iter_value, aggr_alloc_, true /*negative*/))) { + LOG_WARN("failed to sub vector", K(ret)); + } + break; + } default: { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected type", K(column_tc), K(ret)); @@ -6325,6 +6343,21 @@ int ObAggregateProcessor::add_calc_batch( } break; } + case ObCollectionSQLTC: { + uint16_t i = 0; // row num in a batch + for (uint16_t it = selector.begin(); OB_SUCC(ret) && it < selector.end(); selector.next(it)) { + i = selector.get_batch_index(it); + if (src.at(i)->is_null()) { + continue; + } + if (result_datum.is_null()) { + ret = clone_aggr_cell(aggr_cell, *src.at(i), false); + } else if (OB_FAIL(ObArrayExprUtils::vector_datum_add(result_datum, *src.at(i), aggr_alloc_))) { + LOG_WARN("failed to add vector", K(ret)); + } + } + break; + } default: { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected type", K(column_tc), K(ret)); diff --git a/src/sql/engine/basic/ob_compact_row.cpp b/src/sql/engine/basic/ob_compact_row.cpp index 7cb18e9f09..f7cdbd7292 100644 --- a/src/sql/engine/basic/ob_compact_row.cpp +++ b/src/sql/engine/basic/ob_compact_row.cpp @@ -17,6 +17,7 @@ #include "share/vector/ob_uniform_vector.h" #include "share/vector/ob_discrete_vector.h" #include "share/vector/ob_fixed_length_vector.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase { @@ -210,6 +211,13 @@ int ObCompactRow::calc_row_size(const RowMeta &row_meta, const common::ObIArray< SQL_ENG_LOG(WARN, "fail to evel vector", K(ret), K(expr)); } else if (reordered && row_meta.project_idx(col_idx) < row_meta.fixed_cnt_) { // continue, the size is computed in `fixed_size` + } else if (expr->is_nested_expr() && !is_uniform_format(expr->get_format(ctx))) { + int64_t len = 0; + if (OB_FAIL(ObArrayExprUtils::calc_nested_expr_data_size(*expr, ctx, row_idx, len))) { + SQL_ENG_LOG(WARN, "fail to calc nested expr data size", K(ret)); + } else { + size += len; + } } else { ObIVector *vec = expr->get_vector(ctx); const VectorFormat format = vec->get_format(); @@ -393,5 +401,60 @@ int64_t ToStrCompactRow::to_string(char *buf, const int64_t buf_len) const return pos; } +int ObCompactRow::nested_vec_to_row(const ObExpr &expr, ObEvalCtx &ctx, const sql::RowMeta &row_meta, + sql::ObCompactRow *stored_row, const uint64_t row_idx, const int64_t col_idx) +{ + int ret = OB_SUCCESS; + ObIVector *root_vec = expr.get_vector(ctx); + if (root_vec->is_null(row_idx)) { + stored_row->set_null(row_meta, col_idx); + } else { + int64_t offset = stored_row->offset(row_meta, col_idx); + char *row_buf = stored_row->payload(); + int64_t cell_len = 0; + if (OB_FAIL(ObArrayExprUtils::nested_expr_to_row(expr, ctx, row_buf, offset, row_idx, cell_len))) { + LOG_WARN("nested expr to row failed", K(ret)); + } else { + stored_row->update_var_offset(row_meta, col_idx, cell_len); + } + } + return ret; +} + +int ObCompactRow::nested_vec_to_row(const ObExpr &expr, ObEvalCtx &ctx, const sql::RowMeta &row_meta, + sql::ObCompactRow *stored_row, const uint64_t row_idx, const int64_t col_idx, + const int64_t remain_size, int64_t &row_size) +{ + int ret = OB_SUCCESS; + ObIVector *root_vec = expr.get_vector(ctx); + if (root_vec->is_null(row_idx)) { + stored_row->set_null(row_meta, col_idx); + } else { + int64_t offset = stored_row->offset(row_meta, col_idx); + char *row_buf = stored_row->payload(); + int64_t cell_len = 0; + if (OB_FAIL(ObArrayExprUtils::nested_expr_to_row(expr, ctx, row_buf, offset, row_idx, cell_len, &remain_size))) { + LOG_WARN("nested expr to row failed", K(ret)); + } else { + stored_row->update_var_offset(row_meta, col_idx, cell_len); + row_size += cell_len; + } + } + return ret; +} + +int ObCompactRow::nested_vec_to_rows(const ObExpr &expr, ObEvalCtx &ctx, const RowMeta &row_meta, + ObCompactRow **stored_rows, const uint16_t selector[], const int64_t size, const int64_t col_idx) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; i < size && OB_SUCC(ret); i++) { + int64_t row_idx = selector[i]; + if (OB_FAIL(nested_vec_to_row(expr, ctx, row_meta, stored_rows[i], row_idx, col_idx))) { + LOG_WARN("nested expr to row failed", K(ret), K(row_idx), K(size), K(col_idx)); + } + } + return ret; +} + } // end namespace sql } // end namespace oceanbase diff --git a/src/sql/engine/basic/ob_compact_row.h b/src/sql/engine/basic/ob_compact_row.h index e4dcaf2b82..5680329a9f 100644 --- a/src/sql/engine/basic/ob_compact_row.h +++ b/src/sql/engine/basic/ob_compact_row.h @@ -251,6 +251,53 @@ struct ObCompactRow MEMCPY(payload_ + offset, payload, len); } + // only for nested expr + inline void append_cell_payload(const RowMeta &meta, + const int64_t col_idx, + const char *payload, + const ObLength len, + int64_t &pos) { + int64_t off = 0; + if (meta.fixed_expr_reordered()) { + const int32_t idx = meta.project_idx(col_idx); + if (idx < meta.fixed_cnt_) { + off = meta.fixed_offsets_[idx]; + } else { + int32_t *var_offset_arr = var_offsets(meta); + int64_t var_idx = idx - meta.fixed_cnt_; + off = meta.var_data_off_ + var_offset_arr[var_idx]; + var_offset_arr[var_idx + 1] = var_offset_arr[var_idx] + len; + } + } else { + int32_t *var_offset_arr = var_offsets(meta); + off = meta.var_data_off_ + var_offset_arr[col_idx]; + var_offset_arr[col_idx + 1] = var_offset_arr[col_idx] + len; + } + off += pos; + MEMCPY(payload_ + off, payload, len); + pos += len; + } + inline void update_var_offset(const RowMeta &meta, + const int64_t col_idx, + const ObLength len) { + int64_t off = 0; + if (meta.fixed_expr_reordered()) { + const int32_t idx = meta.project_idx(col_idx); + if (idx < meta.fixed_cnt_) { + off = meta.fixed_offsets_[idx]; + } else { + int32_t *var_offset_arr = var_offsets(meta); + int64_t var_idx = idx - meta.fixed_cnt_; + off = meta.var_data_off_ + var_offset_arr[var_idx]; + var_offset_arr[var_idx + 1] = var_offset_arr[var_idx] + len; + } + } else { + int32_t *var_offset_arr = var_offsets(meta); + off = meta.var_data_off_ + var_offset_arr[col_idx]; + var_offset_arr[col_idx + 1] = var_offset_arr[col_idx] + len; + } + } + inline void get_cell_payload(const RowMeta &meta, const int64_t col_idx, const char *&payload, @@ -360,6 +407,13 @@ struct ObCompactRow const ObBatchRows &brs, ObEvalCtx &ctx, int64_t &size); + static int nested_vec_to_row(const ObExpr &expr, ObEvalCtx &ctx, const RowMeta &row_meta, + ObCompactRow *stored_row, const uint64_t row_idx, const int64_t col_idx); + static int nested_vec_to_row(const ObExpr &expr, ObEvalCtx &ctx, const RowMeta &row_meta, + ObCompactRow *stored_row, const uint64_t row_idx, const int64_t col_idx, + const int64_t remain_size, int64_t &row_size); + static int nested_vec_to_rows(const ObExpr &expr, ObEvalCtx &ctx, const RowMeta &row_meta, + ObCompactRow **stored_rows, const uint16_t selector[], const int64_t size, const int64_t col_idx); TO_STRING_KV(K_(header)) protected: RowHeader header_; @@ -429,8 +483,13 @@ public: compact_row_->init(row_meta); compact_row_->set_row_size(static_cast(row_size)); for (int64_t col_idx = 0; col_idx < exprs.count() && OB_SUCC(ret); ++col_idx) { - ObIVector *vec = exprs.at(col_idx)->get_vector(ctx); - vec->to_row(row_meta, compact_row_, ctx.get_batch_idx(), col_idx); + if (exprs.at(col_idx)->is_nested_expr() + && !is_uniform_format(exprs.at(col_idx)->get_format(ctx))) { + ObCompactRow::nested_vec_to_row(*exprs.at(col_idx), ctx, row_meta, compact_row_, ctx.get_batch_idx(), col_idx); // Check row_meta + } else { + ObIVector *vec = exprs.at(col_idx)->get_vector(ctx); + vec->to_row(row_meta, compact_row_, ctx.get_batch_idx(), col_idx); + } } } return ret; diff --git a/src/sql/engine/basic/ob_expr_values_op.cpp b/src/sql/engine/basic/ob_expr_values_op.cpp index 67d5941be7..cf3fabb5a8 100644 --- a/src/sql/engine/basic/ob_expr_values_op.cpp +++ b/src/sql/engine/basic/ob_expr_values_op.cpp @@ -526,6 +526,9 @@ OB_INLINE int ObExprValuesOp::calc_next_row() && ObDatumCast::need_scale_decimalint(src_meta.scale_, src_meta.precision_, dst_expr->datum_meta_.scale_, dst_expr->datum_meta_.precision_)); + bool need_cast_collection_element = + (src_meta.type_ == ObCollectionSQLType && dst_expr->datum_meta_.type_ == ObCollectionSQLType + && src_expr->obj_meta_.get_subschema_id() != dst_expr->obj_meta_.get_subschema_id()); if (OB_FAIL(ret)) { // do nothing } else if (src_expr == dst_expr) { @@ -542,7 +545,8 @@ OB_INLINE int ObExprValuesOp::calc_next_row() } else if (src_meta.type_ == dst_expr->datum_meta_.type_ && src_meta.cs_type_ == dst_expr->datum_meta_.cs_type_ && src_obj_meta.has_lob_header() == dst_expr->obj_meta_.has_lob_header() - && !need_adjust_decimal_int) { + && !need_adjust_decimal_int + && !need_cast_collection_element) { // 将values中数据copy到output中 if (OB_FAIL(src_expr->eval(eval_ctx_, datum))) { // catch err and print log later diff --git a/src/sql/engine/basic/ob_temp_column_store.cpp b/src/sql/engine/basic/ob_temp_column_store.cpp index 56b934df4f..8deead2751 100644 --- a/src/sql/engine/basic/ob_temp_column_store.cpp +++ b/src/sql/engine/basic/ob_temp_column_store.cpp @@ -19,6 +19,7 @@ #include "share/vector/ob_uniform_vector.h" #include "share/vector/ob_discrete_vector.h" #include "share/ob_define.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase { @@ -27,7 +28,93 @@ using namespace common; namespace sql { -int ObTempColumnStore::ColumnBlock::calc_rows_size(const IVectorPtrs &vectors, +int ObTempColumnStore::ColumnBlock::calc_nested_size(ObExpr &expr, ObEvalCtx &ctx, const uint16_t *selector, + const ObArray &lengths, const int64_t size, + int64_t &batch_mem_size) +{ + int ret = OB_SUCCESS; + ObIVector *vec = expr.get_vector(ctx); + const VectorFormat format = vec->get_format(); + if (is_uniform_format(format) && OB_FAIL(distribute_uniform_nested_batch(expr, ctx, selector, format, size))) { + SQL_LOG(WARN, "Failed to add batch nested attrs", K(ret), K(format), K(size)); + } + for (uint32_t i = 0; i < expr.attrs_cnt_ && OB_SUCC(ret); ++i) { + ObIVector *vec = expr.attrs_[i]->get_vector(ctx); + const VectorFormat format = vec->get_format(); + switch (format) { + case VEC_FIXED: + batch_mem_size += calc_size(static_cast(vec), selector, size); + break; + case VEC_DISCRETE: + batch_mem_size += calc_size(static_cast(vec), selector, size); + break; + case VEC_CONTINUOUS: + batch_mem_size += calc_size(static_cast(vec), selector, size); + break; + case VEC_UNIFORM: + batch_mem_size += calc_size(static_cast(vec), selector, size, UNFIXED_LENGTH); + break; + case VEC_UNIFORM_CONST: + batch_mem_size += calc_size(static_cast(vec), selector, size, UNFIXED_LENGTH); + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vector format", K(ret), K(size)); + } + } + return ret; +} + +int ObTempColumnStore::ColumnBlock::distribute_uniform_nested_batch(ObExpr &expr, ObEvalCtx &ctx, const uint16_t *selector, + const VectorFormat format, const int64_t size) +{ + int ret = OB_SUCCESS; + for (uint32_t i = 0; i < expr.attrs_cnt_ && OB_SUCC(ret); ++i) { + if (OB_FAIL(expr.attrs_[i]->init_vector(ctx, i == 0 ? VEC_FIXED : format, size))) { + SQL_LOG(WARN, "Failed to init vector", K(ret), K(i), K(format), K(size)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObArrayExprUtils::batch_dispatch_array_attrs(ctx, expr, 0, size, selector))) { + SQL_LOG(WARN, "Failed to dispatch nested attrs", K(ret), K(format), K(size)); + } + return ret; +} + +int ObTempColumnStore::ColumnBlock::add_nested_batch(ObExpr &expr, ObEvalCtx &ctx, const uint16_t *selector, + const int64_t size, char *head, int64_t &pos) +{ + int ret = OB_SUCCESS; + for (uint32_t i = 0; i < expr.attrs_cnt_ && OB_SUCC(ret); ++i) { + ObIVector *vec = expr.attrs_[i]->get_vector(ctx); + const VectorFormat format = vec->get_format(); + switch (format) { + case VEC_FIXED: + ret = to_buf(static_cast(vec), selector, size, head, pos); + break; + case VEC_DISCRETE: + ret = to_buf(static_cast(vec), selector, size, head, pos); + break; + case VEC_CONTINUOUS: + ret = to_buf(static_cast(vec), selector, size, head, pos); + break; + case VEC_UNIFORM: + ret = to_buf(static_cast(vec), selector, size, UNFIXED_LENGTH, head, pos); + break; + case VEC_UNIFORM_CONST: + ret = to_buf(static_cast(vec), selector, size, UNFIXED_LENGTH, head, pos); + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vector format", K(ret), K(format)); + } + } + return ret; +} + +int ObTempColumnStore::ColumnBlock::calc_rows_size(ObEvalCtx &ctx, + const ObExprPtrIArray &exprs, + const IVectorPtrs &vectors, const uint16_t *selector, const ObArray &lengths, const int64_t size, @@ -38,32 +125,40 @@ int ObTempColumnStore::ColumnBlock::calc_rows_size(const IVectorPtrs &vectors, for (int64_t i = 0; OB_SUCC(ret) && i < vectors.count(); ++i) { const ObIVector *vec = vectors.at(i); const VectorFormat format = vec->get_format(); - switch (format) { - case VEC_FIXED: - batch_mem_size += calc_size(static_cast(vec), selector, size); - break; - case VEC_DISCRETE: - batch_mem_size += calc_size(static_cast(vec), selector, size); - break; - case VEC_CONTINUOUS: - batch_mem_size += calc_size(static_cast(vec), selector, size); - break; - case VEC_UNIFORM: - batch_mem_size += calc_size(static_cast(vec), - selector, size, lengths[i]); - break; - case VEC_UNIFORM_CONST: - batch_mem_size += calc_size(static_cast(vec), - selector, size, lengths[i]); - break; - default: - ret = OB_ERR_UNEXPECTED; + if (exprs.at(i)->is_nested_expr()) { + if (OB_FAIL(ColumnBlock::calc_nested_size(*exprs.at(i), ctx, selector, lengths, size, batch_mem_size))) { + LOG_WARN("calc nested expr size failed", K(ret), K(size)); + } + } else { + switch (format) { + case VEC_FIXED: + batch_mem_size += calc_size(static_cast(vec), selector, size); + break; + case VEC_DISCRETE: + batch_mem_size += calc_size(static_cast(vec), selector, size); + break; + case VEC_CONTINUOUS: + batch_mem_size += calc_size(static_cast(vec), selector, size); + break; + case VEC_UNIFORM: + batch_mem_size += calc_size(static_cast(vec), + selector, size, lengths[i]); + break; + case VEC_UNIFORM_CONST: + batch_mem_size += calc_size(static_cast(vec), + selector, size, lengths[i]); + break; + default: + ret = OB_ERR_UNEXPECTED; + } } } return ret; } -int ObTempColumnStore::ColumnBlock::add_batch(ShrinkBuffer &buf, +int ObTempColumnStore::ColumnBlock::add_batch(ObEvalCtx &ctx, + const ObExprPtrIArray &exprs, + ShrinkBuffer &buf, const IVectorPtrs &vectors, const uint16_t *selector, const ObArray &lengths, @@ -83,26 +178,32 @@ int ObTempColumnStore::ColumnBlock::add_batch(ShrinkBuffer &buf, const ObIVector *vec = vectors.at(i); const VectorFormat format = vec->get_format(); vec_offsets[i] = pos; - switch (format) { - case VEC_FIXED: - ret = to_buf(static_cast(vec), selector, size, head, pos); - break; - case VEC_DISCRETE: - ret = to_buf(static_cast(vec), selector, size, head, pos); - break; - case VEC_CONTINUOUS: - ret = to_buf(static_cast(vec), selector, size, head, pos); - break; - case VEC_UNIFORM: - ret = to_buf(static_cast(vec), selector, size, lengths[i], - head, pos); - break; - case VEC_UNIFORM_CONST: - ret = to_buf(static_cast(vec), selector, size, lengths[i], - head, pos); - break; - default: - ret = OB_ERR_UNEXPECTED; + if (exprs.at(i)->is_nested_expr()) { + if (OB_FAIL(ColumnBlock::add_nested_batch(*exprs.at(i), ctx, selector, size, head, pos))) { + LOG_WARN("calc nested expr size failed", K(ret), K(size)); + } + } else { + switch (format) { + case VEC_FIXED: + ret = to_buf(static_cast(vec), selector, size, head, pos); + break; + case VEC_DISCRETE: + ret = to_buf(static_cast(vec), selector, size, head, pos); + break; + case VEC_CONTINUOUS: + ret = to_buf(static_cast(vec), selector, size, head, pos); + break; + case VEC_UNIFORM: + ret = to_buf(static_cast(vec), selector, size, lengths[i], + head, pos); + break; + case VEC_UNIFORM_CONST: + ret = to_buf(static_cast(vec), selector, size, lengths[i], + head, pos); + break; + default: + ret = OB_ERR_UNEXPECTED; + } } } vec_offsets[vectors.count()] = pos; // last offset, the size of vector @@ -118,7 +219,33 @@ int ObTempColumnStore::ColumnBlock::add_batch(ShrinkBuffer &buf, return ret; } -int ObTempColumnStore::ColumnBlock::get_next_batch(const IVectorPtrs &vectors, +int ObTempColumnStore::ColumnBlock::get_nested_batch(ObExpr &expr, ObEvalCtx &ctx, char *buf, int64_t &pos, const int64_t size) const +{ + int ret = OB_SUCCESS; + for (uint32_t i = 0; i < expr.attrs_cnt_ && OB_SUCC(ret); ++i) { + ObIVector *vec = expr.attrs_[i]->get_vector(ctx); + const VectorFormat format = vec->get_format(); + switch (format) { + case VEC_FIXED: + ret = from_buf(buf, pos, size, static_cast(vec)); + break; + case VEC_CONTINUOUS: + ret = from_buf(buf, pos, size, static_cast(vec)); + break; + case VEC_UNIFORM: + static_cast(vec)->set_all_null(size); + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected vector format", K(ret), K(format), K(i), K(expr.attrs_cnt_)); + } + } + + return ret; +} + +int ObTempColumnStore::ColumnBlock::get_next_batch(const ObExprPtrIArray &exprs, + ObEvalCtx &ctx,const IVectorPtrs &vectors, const ObArray &lengths, const int32_t start_read_pos, int32_t &batch_rows, @@ -130,10 +257,14 @@ int ObTempColumnStore::ColumnBlock::get_next_batch(const IVectorPtrs &vectors, const int32_t *vec_offsets = reinterpret_cast(buf + sizeof(int32_t)); for (int64_t i = 0; OB_SUCC(ret) && i < vectors.count(); ++i) { ObIVector *vec = vectors.at(i); + int64_t pos = vec_offsets[i]; if (NULL == vec || (VEC_UNIFORM_CONST == vec->get_format())) { // if vector is null or uniform const, skip read vector + } else if (exprs.at(i)->is_nested_expr()) { + if (OB_FAIL(ColumnBlock::get_nested_batch(*exprs.at(i), ctx, buf, pos, size))) { + LOG_WARN("calc nested expr size failed", K(ret), K(size)); + } } else { - int64_t pos = vec_offsets[i]; const VectorFormat format = vec->get_format(); switch (format) { case VEC_FIXED: @@ -187,7 +318,7 @@ int ObTempColumnStore::Iterator::get_next_batch(const ObExprPtrIArray &exprs, if (OB_FAIL(ret)) { } else if (OB_FAIL(ensure_read_vectors(exprs, ctx, max_rows))) { LOG_WARN("fail to ensure read vectors", K(ret)); - } else if (OB_FAIL(cur_blk_->get_next_batch(*vectors_, column_store_->batch_ctx_->lengths_, + } else if (OB_FAIL(cur_blk_->get_next_batch(exprs, ctx, *vectors_, column_store_->batch_ctx_->lengths_, read_pos_, batch_rows, batch_pos))) { LOG_WARN("fail to get next batch from column block", K(ret)); } else if (OB_UNLIKELY(has_rest_row_in_batch())) { @@ -418,7 +549,7 @@ int ObTempColumnStore::add_batch(const common::ObIArray &exprs, ObEval } int64_t batch_mem_size = 0; if (OB_FAIL(ret)) { - } else if (OB_FAIL(ColumnBlock::calc_rows_size(batch_ctx_->vectors_, + } else if (OB_FAIL(ColumnBlock::calc_rows_size(ctx, exprs, batch_ctx_->vectors_, selector, batch_ctx_->lengths_, size, @@ -426,8 +557,8 @@ int ObTempColumnStore::add_batch(const common::ObIArray &exprs, ObEval LOG_WARN("fail to calc rows size", K(ret)); } else if (OB_FAIL(ensure_write_blk(batch_mem_size))) { LOG_WARN("ensure write block failed", K(ret)); - } else if (OB_FAIL(cur_blk_->add_batch(blk_buf_, batch_ctx_->vectors_, selector, - batch_ctx_->lengths_, size, batch_mem_size))) { + } else if (OB_FAIL(cur_blk_->add_batch(ctx, exprs, blk_buf_, batch_ctx_->vectors_, selector, batch_ctx_->lengths_, + size, batch_mem_size))) { LOG_WARN("fail to add batch to column store", K(ret)); } else { block_id_cnt_ += size; diff --git a/src/sql/engine/basic/ob_temp_column_store.h b/src/sql/engine/basic/ob_temp_column_store.h index c111c19c3e..44a9ef554a 100644 --- a/src/sql/engine/basic/ob_temp_column_store.h +++ b/src/sql/engine/basic/ob_temp_column_store.h @@ -55,23 +55,37 @@ public: */ struct ColumnBlock : public Block { - static int calc_rows_size(const IVectorPtrs &vectors, + static int calc_rows_size(ObEvalCtx &ctx, + const ObExprPtrIArray &exprs, + const IVectorPtrs &vectors, const uint16_t *selector, const ObArray &lengths, const int64_t size, int64_t &batch_mem_size); - int add_batch(ShrinkBuffer &buf, + static int calc_nested_size(ObExpr &expr, ObEvalCtx &ctx, const uint16_t *selector, + const ObArray &lengths, const int64_t size, + int64_t &batch_mem_size); + int add_nested_batch(ObExpr &expr, ObEvalCtx &ctx, const uint16_t *selector, + const int64_t size, char *head, int64_t &pos); + static int distribute_uniform_nested_batch(ObExpr &expr, ObEvalCtx &ctx, const uint16_t *selector, + const VectorFormat format, const int64_t size); + int add_batch(ObEvalCtx &ctx, + const ObExprPtrIArray &exprs, + ShrinkBuffer &buf, const IVectorPtrs &vectors, const uint16_t *selector, const ObArray &lengths, const int64_t size, const int64_t batch_mem_size); - int get_next_batch(const IVectorPtrs &vectors, + int get_next_batch(const ObExprPtrIArray &exprs, + ObEvalCtx &ctx, + const IVectorPtrs &vectors, const ObArray &lengths, const int32_t start_read_pos, int32_t &batch_rows, int32_t &batch_pos) const; + int get_nested_batch(ObExpr &expr, ObEvalCtx &ctx, char *buf, int64_t &pos, const int64_t size) const; private: inline static int64_t get_header_size(const int64_t vec_cnt) { diff --git a/src/sql/engine/basic/ob_temp_row_store.cpp b/src/sql/engine/basic/ob_temp_row_store.cpp index 107eb6ab97..308c2ef723 100644 --- a/src/sql/engine/basic/ob_temp_row_store.cpp +++ b/src/sql/engine/basic/ob_temp_row_store.cpp @@ -19,6 +19,7 @@ #include "share/vector/ob_uniform_vector.h" #include "share/vector/ob_discrete_vector.h" #include "share/ob_define.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase { @@ -67,7 +68,12 @@ int ObTempRowStoreBase::RowBlock::add_row( stored_row->set_null(row_meta, i); } else { ObIVector *vec = expr->get_vector(ctx); - if (OB_FAIL(vec->to_row(row_meta, stored_row, batch_idx, i, + if (expr->is_nested_expr() && !is_uniform_format(vec->get_format())) { + if (OB_FAIL(ObCompactRow::nested_vec_to_row(*expr, ctx, row_meta, stored_row, batch_idx, + i, remain_size - row_size, row_size))) { + LOG_WARN("failed to add row", K(ret)); + } + } else if (OB_FAIL(vec->to_row(row_meta, stored_row, batch_idx, i, remain_size - row_size, expr->is_fixed_length_data_, row_size))) { if (OB_BUF_NOT_ENOUGH != ret) { @@ -151,6 +157,57 @@ int32_t ObTempRowStoreBase::RowBlock::get_row_location(const int64_t row_i - (row_id - block_id_ + 1) * ROW_INDEX_SIZE); } +template +int ObTempRowStoreBase::RowBlock::add_batch_inner(ObEvalCtx &ctx, ShrinkBuffer &buf, const BatchCtx &batch_ctx, + const RowMeta &row_meta, + const int64_t size, + int64_t batch_mem_size, + ObCompactRow **stored_rows) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(batch_mem_size > buf.remain())) { + ret = OB_BUF_NOT_ENOUGH; + } else { + memset(buf.head(), 0, batch_mem_size); + for (int64_t i = 0; i < size; i++) { + stored_rows[i] = reinterpret_cast (buf.head()); + stored_rows[i]->set_row_size(batch_ctx.row_size_array_[i]); + ret = post_add_row(buf, batch_ctx.row_size_array_[i]); + } + for (int64_t col_idx = 0; OB_SUCC(ret) && col_idx < batch_ctx.vectors_.count(); col_idx ++) { + if (nullptr == batch_ctx.vectors_.at(col_idx)) { + bool found = false; + for (int64_t j = 0; j < batch_ctx.nested_col_id_.count() && !found; ++j) { + if (batch_ctx.nested_col_id_.at(j) == col_idx) { + found = true; + ObExpr *expr = batch_ctx.nested_exprs_.at(j); + ObIVector *vec = expr->get_vector(ctx); + // nested expr to rows + if (!is_uniform_format(vec->get_format())) { + if (OB_FAIL(ObCompactRow::nested_vec_to_rows(*expr, ctx, row_meta, stored_rows, + batch_ctx.selector_, size, col_idx))) { + LOG_WARN("failed to do nested expr to rows", K(ret)); + } + } else { + vec->to_rows(row_meta, stored_rows, batch_ctx.selector_, size, col_idx); + } + } + } + if (!found) { + ret = vector_to_nulls(row_meta, stored_rows, batch_ctx.selector_, size, col_idx); + } + } else { + batch_ctx.vectors_.at(col_idx)->to_rows(row_meta, stored_rows, + batch_ctx.selector_, size, col_idx); + } + } + if (OB_SUCC(ret)) { + cnt_ += size; + } + } + return ret; +} + template int ObTempRowStoreBase::RowBlock::get_next_batch(ObTempRowStoreBase::ReaderBase &iter, const int64_t max_rows, @@ -358,6 +415,48 @@ int ObTempRowStoreBase::DtlRowBlock::calc_rows_size(const IVectorPtrs &vecto return ret; } +template +int ObTempRowStoreBase::RowBlock::calc_rows_size_inner(const RowMeta &row_meta, ObEvalCtx &ctx, + const int64_t size, BatchCtx &batch_ctx) +{ + int ret = OB_SUCCESS; + const int64_t fixed_row_size = row_meta.get_row_fixed_size(); + const bool reordered = row_meta.fixed_expr_reordered(); + for (int64_t i = 0; i < size; i++) { + batch_ctx.row_size_array_[i] = fixed_row_size; + } + for (int64_t col_idx = 0; OB_SUCC(ret) && col_idx < batch_ctx.vectors_.count(); col_idx++) { + ObIVector *vec = batch_ctx.vectors_.at(col_idx); + if (reordered && row_meta.project_idx(col_idx) < row_meta.fixed_cnt_) { + continue; + } else if (nullptr == vec) { + bool found = false; + for (int64_t j = 0; j < batch_ctx.nested_col_id_.count() && !found; ++j) { + if (batch_ctx.nested_col_id_.at(j) == col_idx) { + found = true; + ObExpr *expr = batch_ctx.nested_exprs_.at(j); + ObIVector *nest_vec = expr->get_vector(ctx); + if (!is_uniform_format(nest_vec->get_format())) { + if (OB_FAIL(ObTempRowStoreHelper::calc_nested_expr_batch_data_size(*expr, ctx, batch_ctx.selector_, size, batch_ctx.row_size_array_))) { + LOG_WARN("failed to get nested expr batch data size", K(ret)); + } + } else { + ObTempRowStoreHelper::calc_rows_size(nest_vec, batch_ctx.selector_, size, batch_ctx.row_size_array_); + } + } + } + if (!found) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get length", K(ret)); + } + } else { + ObTempRowStoreHelper::calc_rows_size(vec, batch_ctx.selector_, size, batch_ctx.row_size_array_); + } + } + + return ret; +} + template int ObTempRowStoreBase::ReaderBase::init(ObTempRowStoreBase *store) { @@ -505,6 +604,13 @@ int ObTempRowStoreBase::Iterator::attach_rows(const ObExprPtrIArray &expr for (int64_t col_idx = 0; OB_SUCC(ret) && col_idx < exprs.count(); col_idx ++) { if (OB_FAIL(exprs.at(col_idx)->init_vector_default(ctx, read_rows))) { LOG_WARN("fail to init vector", K(ret)); + } else if (exprs.at(col_idx)->is_nested_expr() + && !is_uniform_format(exprs.at(col_idx)->get_format(ctx))) { + if (OB_FAIL(ObArrayExprUtils::nested_expr_from_rows(*exprs.at(col_idx), ctx, row_meta, srows, read_rows, col_idx))) { + LOG_WARN("fail to do nested expr from rows", K(ret)); + } else { + exprs.at(col_idx)->set_evaluated_projected(ctx); + } } else { ObIVector *vec = exprs.at(col_idx)->get_vector(ctx); if (VEC_UNIFORM_CONST != vec->get_format()) { @@ -664,7 +770,27 @@ int ObTempRowStoreBase::init(const RowMeta &row_meta, } template -int ObTempRowStoreBase::init_batch_ctx() +int ObTempRowStoreBase::init_batch_nested_ctx(const ObExprPtrIArray *exprs) +{ + int ret = OB_SUCCESS; + uint32_t nested_cnt = 0; + for (int64_t i = 0; exprs != NULL && i < exprs->count(); i++) { + if (exprs->at(i)->is_nested_expr()) { + nested_cnt++; + } + } + if (nested_cnt > 0) { + if (OB_FAIL(batch_ctx_->nested_exprs_.prepare_allocate(nested_cnt))) { + LOG_WARN("init nested exprs array failed", K(ret), K(nested_cnt)); + } else if (OB_FAIL(batch_ctx_->nested_col_id_.prepare_allocate(nested_cnt))) { + LOG_WARN("init nested col id array failed", K(ret), K(nested_cnt)); + } + } + return ret; +} + +template +int ObTempRowStoreBase::init_batch_ctx(const ObExprPtrIArray *exprs) { int ret = OB_SUCCESS; const int64_t max_batch_size = max_batch_size_; @@ -686,6 +812,9 @@ int ObTempRowStoreBase::init_batch_ctx() batch_ctx_->vectors_.set_attr(mem_attr_); ret = batch_ctx_->vectors_.prepare_allocate(col_cnt_); batch_ctx_->max_batch_size_ = max_batch_size; + if (OB_SUCC(ret) && OB_FAIL(init_batch_nested_ctx(exprs))) { + LOG_WARN("init batch nested ctx failed", K(ret), K(size), K(col_cnt_), K(max_batch_size)); + } if (OB_SUCC(ret)) { mem += sizeof(*batch_ctx_); #define SET_BATCH_CTX_FIELD(X, N) \ @@ -731,12 +860,21 @@ int ObTempRowStoreBase::add_batch(const common::ObIArray &exprs, O ObIVector *vec = NULL; if (OB_FAIL(e->eval_vector(ctx, brs))) { LOG_WARN("evaluate batch failed", K(ret)); + } else if (e->is_nested_expr()) { + batch_ctx_->nested_exprs_.push_back(e); + batch_ctx_->nested_col_id_.push_back(i); } else { vec = e->get_vector(ctx); batch_ctx_->vectors_.at(i) = vec; } } - OZ (add_batch(batch_ctx_->vectors_, batch_ctx_->selector_, size, stored_rows)); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(RowBlock::calc_rows_size_inner(row_meta_, ctx, size, + *batch_ctx_))) { + LOG_WARN("fail to calc rows size", K(ret)); + } else if (OB_FAIL(add_batch_inner(ctx, size, stored_rows))) { + LOG_WARN("fail to add batch inner", K(ret)); + } } if (OB_SUCC(ret)) { stored_rows_count = size; @@ -822,11 +960,7 @@ int ObTempRowStoreBase::try_add_batch(const common::ObIArray &e if (OB_FAIL(ret)) { } else if (OB_UNLIKELY(0 == batch_size)) { // no rows, do nothing - } else if (OB_FAIL(RowBlock::calc_rows_size(batch_ctx_->vectors_, - row_meta_, - batch_ctx_->selector_, - batch_size, - batch_ctx_->row_size_array_))) { + } else if (OB_FAIL(RowBlock::calc_rows_size_inner(row_meta_, *ctx, batch_size, *batch_ctx_))) { LOG_WARN("fail to calc rows size", K(ret)); } else { for (int64_t i = 0; i < batch_size; i++) { @@ -838,10 +972,7 @@ int ObTempRowStoreBase::try_add_batch(const common::ObIArray &e batch_added = false; } else { int64_t count = 0; - if (OB_FAIL(add_batch(batch_ctx_->vectors_, - batch_ctx_->selector_, - batch_size, - batch_ctx_->rows_))) { + if (OB_FAIL(add_batch_inner(*ctx, batch_size, batch_ctx_->rows_))) { LOG_WARN("failed to add batch", K(ret)); } else { batch_added = true; @@ -970,6 +1101,82 @@ int ObTempRowStoreBase::add_batch(const IVectorPtrs &vectors, return ret; } +template +int ObTempRowStoreBase::add_batch_inner(ObEvalCtx &ctx, const int64_t size, ObCompactRow **stored_rows) +{ + int ret = OB_SUCCESS; + int64_t batch_mem_size = 0; + if (OB_UNLIKELY(0 == size)) { + // no rows, do nothing + } else { + ObCompactRow **rows = (NULL == stored_rows) ? batch_ctx_->rows_ : stored_rows; + for (int64_t i = 0; i < size; i++) { + batch_mem_size += batch_ctx_->row_size_array_[i]; + } + batch_mem_size += size * (RA ? ROW_INDEX_SIZE : 0); + if (OB_FAIL(ensure_write_blk(batch_mem_size))) { + LOG_WARN("ensure write block failed", K(ret)); + } else if (OB_FAIL(cur_blk()->add_batch_inner(ctx, blk_buf_, *batch_ctx_, row_meta_, size, batch_mem_size, rows))) { + LOG_WARN("fail to add batch", K(ret)); + } else { + block_id_cnt_ += size; + inc_mem_used(batch_mem_size); + } + } + return ret; +} + +void ObTempRowStoreHelper::calc_rows_size(ObIVector *vec, const uint16_t selector[], const int64_t size, uint32_t row_size_arr[]) +{ + VectorFormat format = vec->get_format(); + if (VEC_DISCRETE == format) { + ObDiscreteBase *disc_vec = static_cast(vec); + ObLength *lens = disc_vec->get_lens(); + for (int64_t i = 0; i < size; i++) { + if (!disc_vec->is_null(selector[i])) { + row_size_arr[i] += lens[selector[i]]; + } + } + } else if (VEC_CONTINUOUS == format) { + ObContinuousBase *cont_vec = static_cast(vec); + uint32_t *offsets = cont_vec->get_offsets(); + for (int64_t i = 0; i < size; i++) { + row_size_arr[i] += offsets[selector[i] + 1] - offsets[selector[i]]; + } + } else if (is_uniform_format(format)) { + ObUniformBase *uni_vec = static_cast(vec); + ObDatum *datums = uni_vec->get_datums(); + const uint16_t idx_mask = VEC_UNIFORM_CONST == format ? 0 : UINT16_MAX; + for (int64_t i = 0; i < size; i++) { + if (!datums[selector[i] & idx_mask].is_null()) { + row_size_arr[i] += datums[selector[i] & idx_mask].len_; + } + } + } else if (VEC_FIXED == format) { + ObFixedLengthBase *fixed_vec = static_cast(vec); + for (int64_t i = 0; i < size; i++) { + row_size_arr[i] += fixed_vec->get_length(); + } + } +} + +int ObTempRowStoreHelper::calc_nested_expr_batch_data_size(const ObExpr &expr, ObEvalCtx &ctx, + const uint16_t selector[], const int64_t size, + uint32_t row_size_arr[]) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; i < size && OB_SUCC(ret); i++) { + int64_t len = 0; + if (OB_FAIL(ObArrayExprUtils::calc_nested_expr_data_size(expr, ctx, selector[i], len))) { + SQL_ENG_LOG(WARN, "fail to calc nested expr data size", K(ret)); + } else { + row_size_arr[i] += len; + } + } + return ret; +} + + template int ObTempRowStoreBase::RowBlock::vector_to_nulls(const sql::RowMeta &row_meta, sql::ObCompactRow **stored_rows, diff --git a/src/sql/engine/basic/ob_temp_row_store.h b/src/sql/engine/basic/ob_temp_row_store.h index b3bcebb922..2893c24278 100644 --- a/src/sql/engine/basic/ob_temp_row_store.h +++ b/src/sql/engine/basic/ob_temp_row_store.h @@ -65,6 +65,7 @@ public: * During random reading, use get_buffer() to find the end of indexes, then get the position by * index, and finally obtain the compact row based on the position. */ + struct BatchCtx; struct RowBlock : public Block { int add_row(ShrinkBuffer &buf, @@ -91,6 +92,8 @@ public: const int64_t size, uint32_t row_size_arr[], const common::ObIArray *dup_length = nullptr); + static int calc_rows_size_inner(const RowMeta &row_meta, ObEvalCtx &ctx, + const int64_t size, BatchCtx &batch_ctx); static int calc_row_size(const common::ObIArray &exprs, const RowMeta &row_meta, ObEvalCtx &ctx, @@ -98,6 +101,8 @@ public: int32_t rows() const { return cnt_; } int get_store_row(int64_t &cur_pos, const ObCompactRow *&sr); int get_row(const int64_t row_id, const ObCompactRow *&sr) const; + int add_batch_inner(ObEvalCtx &ctx, ShrinkBuffer &buf, const BatchCtx &batch_ctx, const RowMeta &row_meta, + const int64_t size, int64_t batch_mem_size, ObCompactRow **stored_rows); private: static int vector_to_nulls(const sql::RowMeta &row_meta, sql::ObCompactRow **stored_rows, @@ -219,12 +224,16 @@ public: rows_ = nullptr; row_size_array_ = nullptr; selector_ = nullptr; + nested_exprs_.reset(); + nested_col_id_.reset(); } ObArray vectors_; ObCompactRow **rows_; uint32_t *row_size_array_; int64_t max_batch_size_; uint16_t *selector_; + ObArray nested_exprs_; + ObArray nested_col_id_; }; public: @@ -255,7 +264,8 @@ public: const common::ObCompressorType compressor_type, const bool enable_trunc = false); - int init_batch_ctx(); + int init_batch_ctx(const ObExprPtrIArray *exprs = NULL); + int init_batch_nested_ctx(const ObExprPtrIArray *exprs); int begin(Iterator &it) { @@ -346,6 +356,9 @@ private: return reinterpret_cast(blk_); } +private : + int add_batch_inner(ObEvalCtx &ctx, const int64_t size, ObCompactRow **stored_rows); + private: lib::ObMemAttr mem_attr_; int64_t col_cnt_; @@ -457,6 +470,16 @@ private: int64_t part_cnt_; }; +class ObTempRowStoreHelper +{ +public: + static void calc_rows_size(ObIVector *vec, const uint16_t selector[], + const int64_t size, uint32_t row_size_arr[]); + static int calc_nested_expr_batch_data_size(const ObExpr &expr, ObEvalCtx &ctx, + const uint16_t selector[], const int64_t size, + uint32_t row_size_arr[]); +}; + } // end namespace sql } // end namespace oceanbase diff --git a/src/sql/engine/basic/ob_vector_result_holder.cpp b/src/sql/engine/basic/ob_vector_result_holder.cpp index c290a0a9ce..ae43538437 100644 --- a/src/sql/engine/basic/ob_vector_result_holder.cpp +++ b/src/sql/engine/basic/ob_vector_result_holder.cpp @@ -284,6 +284,21 @@ int ObVectorsResultHolder::init(const common::ObIArray &exprs, ObEvalC } else { for (int64_t i = 0; i < exprs.count(); ++i) { new (&backup_cols_[i]) ObColResultHolder(eval_ctx.max_batch_size_, exprs.at(i)); + if (exprs.at(i)->is_nested_expr() && !is_uniform_format(exprs_->at(i)->get_format(*eval_ctx_))) { + backup_cols_[i].expr_attrs_ = exprs.at(i)->attrs_; + backup_cols_[i].attrs_cnt_ = exprs.at(i)->attrs_cnt_; + if (OB_ISNULL(backup_cols_[i].attrs_res_ = static_cast + (eval_ctx.exec_ctx_.get_allocator().alloc(sizeof(ObColResultHolder) + * backup_cols_[i].attrs_cnt_)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc ptrs", K(ret)); + } else { + for (uint32_t j = 0; j < backup_cols_[i].attrs_cnt_; ++j) { + new (&backup_cols_[i].attrs_res_[j]) ObColResultHolder(eval_ctx.max_batch_size_, + backup_cols_[i].expr_attrs_[j]); + } + } + } } } inited_ = true; @@ -291,6 +306,95 @@ int ObVectorsResultHolder::init(const common::ObIArray &exprs, ObEvalC return ret; } +int ObVectorsResultHolder::ObColResultHolder::save_nested(ObIAllocator &alloc, const int64_t batch_size, + ObEvalCtx *eval_ctx) +{ + int ret = OB_SUCCESS; + for (uint32_t i = 0; OB_SUCC(ret) && i < attrs_cnt_; ++i) { + if (OB_FAIL(attrs_res_[i].header_.assign(expr_attrs_[i]->get_vector_header(*eval_ctx)))) { + LOG_WARN("failed to assign vector", K(ret)); + } else if (OB_FAIL(attrs_res_[i].save(alloc, batch_size, eval_ctx))) { + LOG_WARN("failed to backup col", K(ret), K(i)); + } + } + return ret; +} + +int ObVectorsResultHolder::ObColResultHolder::save(ObIAllocator &alloc, const int64_t batch_size, + ObEvalCtx *eval_ctx) +{ + int ret = OB_SUCCESS; + VectorFormat format = header_.format_; + switch (format) { + case VEC_FIXED: + OZ(copy_fixed_base( + static_cast(*expr_->get_vector(*eval_ctx)), alloc, batch_size, *eval_ctx)); + break; + case VEC_DISCRETE: + OZ(copy_discrete_base( + static_cast(*expr_->get_vector(*eval_ctx)), alloc, batch_size, *eval_ctx)); + break; + case VEC_CONTINUOUS: + OZ(copy_continuous_base( + static_cast(*expr_->get_vector(*eval_ctx)), alloc, batch_size, *eval_ctx)); + break; + case VEC_UNIFORM: + OZ(copy_uniform_base(expr_, static_cast(*expr_->get_vector(*eval_ctx)), + false, *eval_ctx, alloc, batch_size)); + break; + case VEC_UNIFORM_CONST: + OZ(copy_uniform_base(expr_, static_cast(*expr_->get_vector(*eval_ctx)), + true, *eval_ctx, alloc, batch_size)); + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get wrong vector format", K(format), K(ret)); + break; + } + return ret; +} + +int ObVectorsResultHolder::ObColResultHolder::restore_nested(const int64_t saved_size, ObEvalCtx *eval_ctx) +{ + int ret = OB_SUCCESS; + for (uint32_t i = 0; OB_SUCC(ret) && i < attrs_cnt_; ++i) { + if (OB_FAIL(attrs_res_[i].header_.assign(expr_attrs_[i]->get_vector_header(*eval_ctx)))) { + LOG_WARN("failed to assign vector", K(ret)); + } else if (OB_FAIL(attrs_res_[i].restore(saved_size, eval_ctx))) { + LOG_WARN("failed to backup col", K(ret), K(i)); + } + } + return ret; +} + +int ObVectorsResultHolder::ObColResultHolder::restore(const int64_t saved_size, ObEvalCtx *eval_ctx) +{ + int ret = OB_SUCCESS; + VectorFormat format = header_.format_; + switch (format) { + case VEC_FIXED: + restore_fixed_base(static_cast(*expr_->get_vector(*eval_ctx)), saved_size, *eval_ctx); + break; + case VEC_DISCRETE: + restore_discrete_base(static_cast(*expr_->get_vector(*eval_ctx)), saved_size, *eval_ctx); + break; + case VEC_CONTINUOUS: + restore_continuous_base(static_cast(*expr_->get_vector(*eval_ctx)), saved_size, *eval_ctx); + break; + case VEC_UNIFORM: + restore_uniform_base(expr_, static_cast(*expr_->get_vector(*eval_ctx)), false, *eval_ctx, saved_size); + break; + case VEC_UNIFORM_CONST: + restore_uniform_base(expr_, static_cast(*expr_->get_vector(*eval_ctx)), true, *eval_ctx, saved_size); + break; + default: + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get wrong vector format", K(format), K(ret)); + break; + } + return ret; +} + void ObVectorsResultHolder::destroy() { if (tmp_alloc_ != nullptr && exprs_ != nullptr && backup_cols_ != nullptr) { @@ -315,39 +419,11 @@ int ObVectorsResultHolder::save(const int64_t batch_size) for (int64_t i = 0; OB_SUCC(ret) && i < exprs_->count(); ++i) { if (OB_FAIL(backup_cols_[i].header_.assign(exprs_->at(i)->get_vector_header(*eval_ctx_)))) { LOG_WARN("failed to assign vector", K(ret)); - } else { - VectorFormat format = backup_cols_[i].header_.format_; - switch (format) { - case VEC_FIXED: - OZ (backup_cols_[i].copy_fixed_base(static_cast - (*exprs_->at(i)->get_vector(*eval_ctx_)), - alloc, batch_size, *eval_ctx_)); - break; - case VEC_DISCRETE: - OZ (backup_cols_[i].copy_discrete_base(static_cast - (*exprs_->at(i)->get_vector(*eval_ctx_)), - alloc, batch_size, *eval_ctx_)); - break; - case VEC_CONTINUOUS: - OZ (backup_cols_[i].copy_continuous_base(static_cast - (*exprs_->at(i)->get_vector(*eval_ctx_)), - alloc, batch_size, *eval_ctx_)); - break; - case VEC_UNIFORM: - OZ (backup_cols_[i].copy_uniform_base(exprs_->at(i), static_cast - (*exprs_->at(i)->get_vector(*eval_ctx_)), false, - *eval_ctx_, alloc, batch_size)); - break; - case VEC_UNIFORM_CONST: - OZ (backup_cols_[i].copy_uniform_base(exprs_->at(i), static_cast - (*exprs_->at(i)->get_vector(*eval_ctx_)), true, - *eval_ctx_, alloc, batch_size)); - break; - default: - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get wrong vector format", K(format), K(ret)); - break; - } + } else if (OB_FAIL(backup_cols_[i].save(alloc, batch_size, eval_ctx_))) { + LOG_WARN("failed to backup col", K(ret), K(i)); + } else if (exprs_->at(i)->is_nested_expr() && !is_uniform_format(exprs_->at(i)->get_format(*eval_ctx_)) + && OB_FAIL(backup_cols_[i].save_nested(alloc, batch_size, eval_ctx_))) { + LOG_WARN("failed to backup nested col", K(ret), K(i)); } } } @@ -366,39 +442,11 @@ int ObVectorsResultHolder::restore() const for (int64_t i = 0; OB_SUCC(ret) && i < exprs_->count(); ++i) { if (OB_FAIL(exprs_->at(i)->get_vector_header(*eval_ctx_).assign(backup_cols_[i].header_))) { LOG_WARN("failed to assign vector", K(ret)); - } else { - VectorFormat format = backup_cols_[i].header_.format_; - switch (format) { - case VEC_FIXED: - backup_cols_[i].restore_fixed_base(static_cast - (*exprs_->at(i)->get_vector(*eval_ctx_)), - saved_size_, *eval_ctx_); - break; - case VEC_DISCRETE: - backup_cols_[i].restore_discrete_base(static_cast - (*exprs_->at(i)->get_vector(*eval_ctx_)), - saved_size_, *eval_ctx_); - break; - case VEC_CONTINUOUS: - backup_cols_[i].restore_continuous_base(static_cast - (*exprs_->at(i)->get_vector(*eval_ctx_)), - saved_size_, *eval_ctx_); - break; - case VEC_UNIFORM: - backup_cols_[i].restore_uniform_base(exprs_->at(i), static_cast - (*exprs_->at(i)->get_vector(*eval_ctx_)), false, - *eval_ctx_, saved_size_); - break; - case VEC_UNIFORM_CONST: - backup_cols_[i].restore_uniform_base(exprs_->at(i), static_cast - (*exprs_->at(i)->get_vector(*eval_ctx_)), true, - *eval_ctx_, saved_size_); - break; - default: - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get wrong vector format", K(format), K(ret)); - break; - } + } else if (OB_FAIL(backup_cols_[i].restore(saved_size_, eval_ctx_))) { + LOG_WARN("failed to restore col", K(ret), K(saved_size_), K(i)); + } else if (exprs_->at(i)->is_nested_expr() && !is_uniform_format(exprs_->at(i)->get_format(*eval_ctx_)) + && OB_FAIL(backup_cols_[i].restore_nested(saved_size_, eval_ctx_))) { + LOG_WARN("failed to backup nested col", K(ret), K(i)); } } } diff --git a/src/sql/engine/basic/ob_vector_result_holder.h b/src/sql/engine/basic/ob_vector_result_holder.h index 367e159930..335e9902cc 100644 --- a/src/sql/engine/basic/ob_vector_result_holder.h +++ b/src/sql/engine/basic/ob_vector_result_holder.h @@ -63,7 +63,8 @@ private: offsets_(nullptr), continuous_data_(nullptr), expr_(expr), frame_nulls_(nullptr), frame_datums_(nullptr), frame_data_(nullptr), frame_lens_(nullptr), frame_ptrs_(nullptr), - frame_offsets_(nullptr), frame_continuous_data_(nullptr) {} + frame_offsets_(nullptr), frame_continuous_data_(nullptr), + expr_attrs_(nullptr), attrs_res_(nullptr), attrs_cnt_(0) {} void reset(common::ObIAllocator &alloc); int copy_vector_base(const ObVectorBase &vec); int copy_bitmap_null_base(const ObBitmapNullVectorBase &vec, @@ -95,6 +96,10 @@ private: void restore_uniform_base(const ObExpr *expr, ObUniformBase &vec, bool is_const, ObEvalCtx &eval_ctx, const int64_t batch_size) const; + int save_nested(ObIAllocator &alloc, const int64_t batch_size, ObEvalCtx *eval_ctx); + int save(ObIAllocator &alloc, const int64_t batch_size, ObEvalCtx *eval_ctx); + int restore_nested(const int64_t saved_size, ObEvalCtx *eval_ctx); + int restore(const int64_t saved_size, ObEvalCtx *eval_ctx); VectorHeader header_; int64_t max_row_cnt_; //ObVectorBase @@ -120,6 +125,9 @@ private: char **frame_ptrs_; //ObDiscreteBase uint32_t *frame_offsets_; //ObContinuousBase char *frame_continuous_data_; //ObContinuousBase + ObExpr **expr_attrs_; + ObColResultHolder *attrs_res_; + uint32_t attrs_cnt_; }; const common::ObIArray *exprs_; ObEvalCtx *eval_ctx_; diff --git a/src/sql/engine/dml/ob_conflict_checker.cpp b/src/sql/engine/dml/ob_conflict_checker.cpp index c92706affe..54912aea1e 100644 --- a/src/sql/engine/dml/ob_conflict_checker.cpp +++ b/src/sql/engine/dml/ob_conflict_checker.cpp @@ -57,6 +57,7 @@ OB_DEF_SERIALIZE(ObConflictCheckerCtdef) OB_UNIS_ENCODE(table_column_exprs_); OB_UNIS_ENCODE(use_dist_das_); OB_UNIS_ENCODE(rowkey_count_); + OB_UNIS_ENCODE(attach_spec_); } return ret; } @@ -86,6 +87,7 @@ OB_DEF_DESERIALIZE(ObConflictCheckerCtdef) OB_UNIS_DECODE(table_column_exprs_); OB_UNIS_DECODE(use_dist_das_); OB_UNIS_DECODE(rowkey_count_); + OB_UNIS_DECODE(attach_spec_); } return ret; } @@ -109,6 +111,7 @@ OB_DEF_SERIALIZE_SIZE(ObConflictCheckerCtdef) OB_UNIS_ADD_LEN(table_column_exprs_); OB_UNIS_ADD_LEN(use_dist_das_); OB_UNIS_ADD_LEN(rowkey_count_); + OB_UNIS_ADD_LEN(attach_spec_); return len; } @@ -170,6 +173,7 @@ ObConflictChecker::ObConflictChecker(common::ObIAllocator &allocator, : eval_ctx_(eval_ctx), checker_ctdef_(checker_ctdef), das_scan_rtdef_(), + attach_rtinfo_(nullptr), allocator_(allocator), das_ref_(eval_ctx, eval_ctx.exec_ctx_), local_tablet_loc_(nullptr), @@ -765,6 +769,16 @@ int ObConflictChecker::get_das_scan_op(ObDASTabletLoc *tablet_loc, ObDASScanOp * das_scan_op->set_scan_rtdef(&das_scan_rtdef_); table_loc_->is_reading_ = true; //mark the table location with reading action } + if (OB_SUCC(ret) && OB_NOT_NULL(attach_rtinfo_)) { + if (OB_FAIL(das_scan_op->reserve_related_buffer(attach_rtinfo_->related_scan_cnt_))) { + LOG_WARN("fail to reserve related buffer", K(ret), K(attach_rtinfo_->related_scan_cnt_)); + } else if (OB_FAIL(attach_related_taskinfo(*das_scan_op, attach_rtinfo_->attach_rtdef_))) { + LOG_WARN("fail to attach related task info", K(ret)); + } else { + das_scan_op->set_attach_ctdef(checker_ctdef_.attach_spec_.attach_ctdef_); + das_scan_op->set_attach_rtdef(attach_rtinfo_->attach_rtdef_); + } + } } return ret; } @@ -962,8 +976,129 @@ int ObConflictChecker::init_das_scan_rtdef() int64_t schema_version = task_exec_ctx.get_query_tenant_begin_schema_version(); das_scan_rtdef_.tenant_schema_version_ = schema_version; das_scan_rtdef_.eval_ctx_ = &eval_ctx_; + das_scan_rtdef_.ctdef_ = &checker_ctdef_.das_scan_ctdef_; + das_scan_rtdef_.table_loc_ = table_loc_; if (OB_FAIL(das_scan_rtdef_.init_pd_op(eval_ctx_.exec_ctx_, checker_ctdef_.das_scan_ctdef_))) { LOG_WARN("init pushdown storage filter failed", K(ret)); + } else if (nullptr != checker_ctdef_.attach_spec_.attach_ctdef_) { + if (OB_ISNULL(attach_rtinfo_ = OB_NEWx(ObDASAttachRtInfo, &allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate das attach info", K(ret)); + } else if (OB_FAIL(init_attach_scan_rtdef(checker_ctdef_.attach_spec_.attach_ctdef_, attach_rtinfo_->attach_rtdef_))) { + LOG_WARN("fail to init attach scan rtdef", K(ret), KPC(checker_ctdef_.attach_spec_.attach_ctdef_)); + } + } + return ret; +} + +int ObConflictChecker::init_attach_scan_rtdef(const ObDASBaseCtDef *attach_ctdef, + ObDASBaseRtDef *&attach_rtdef) +{ + int ret = OB_SUCCESS; + ObExecContext &ctx = eval_ctx_.exec_ctx_; + ObDASTaskFactory &das_factory = DAS_CTX(ctx).get_das_factory(); + if (OB_ISNULL(attach_ctdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("attach ctdef is nullptr", K(ret)); + } else if (OB_FAIL(das_factory.create_das_rtdef(attach_ctdef->op_type_, attach_rtdef))) { + LOG_WARN("create das rtdef failed", K(ret), K(attach_ctdef->op_type_)); + } else if (ObDASTaskFactory::is_attached(attach_ctdef->op_type_)) { + attach_rtdef->ctdef_ = attach_ctdef; + attach_rtdef->children_cnt_ = attach_ctdef->children_cnt_; + attach_rtdef->eval_ctx_ = &eval_ctx_; + if (attach_ctdef->children_cnt_ > 0) { + if (OB_ISNULL(attach_rtdef->children_ = OB_NEW_ARRAY(ObDASBaseRtDef*, + &ctx.get_allocator(), + attach_ctdef->children_cnt_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate child buf failed", K(ret), K(attach_ctdef->children_cnt_)); + } + for (int i = 0; OB_SUCC(ret) && i < attach_ctdef->children_cnt_; ++i) { + if (OB_FAIL(init_attach_scan_rtdef(attach_ctdef->children_[i], attach_rtdef->children_[i]))) { + LOG_WARN("init attach scan rtdef failed", K(ret)); + } + } + } + } else { + attach_rtinfo_->related_scan_cnt_++; + if (attach_ctdef == &checker_ctdef_.das_scan_ctdef_) { + attach_rtdef = &das_scan_rtdef_; + } else if (attach_ctdef->op_type_ != DAS_OP_TABLE_SCAN) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("attach ctdef type is invalid", K(ret), K(attach_ctdef->op_type_)); + } else { + ObPhysicalPlanCtx *plan_ctx = eval_ctx_.exec_ctx_.get_physical_plan_ctx(); + ObSQLSessionInfo *my_session = eval_ctx_.exec_ctx_.get_my_session(); + ObTaskExecutorCtx &task_exec_ctx = eval_ctx_.exec_ctx_.get_task_exec_ctx(); + const ObDASScanCtDef *attach_scan_ctdef = static_cast(attach_ctdef); + const ObDASTableLocMeta *attach_loc_meta = checker_ctdef_.attach_spec_.get_attach_loc_meta( + table_loc_->get_table_location_key(), attach_scan_ctdef->ref_table_id_); + ObDASScanRtDef *attach_scan_rtdef = static_cast(attach_rtdef); + attach_scan_rtdef->timeout_ts_ = plan_ctx->get_ps_timeout_timestamp(); + attach_scan_rtdef->sql_mode_ = my_session->get_sql_mode(); + attach_scan_rtdef->stmt_allocator_.set_alloc(&das_ref_.get_das_alloc()); + attach_scan_rtdef->scan_allocator_.set_alloc(&das_ref_.get_das_alloc()); + ObQueryFlag query_flag(ObQueryFlag::Forward/*scan_order*/, false/*daily_merge*/, false/*optimize*/, + false/*sys scan*/, false/*full_row*/, false/*index_back*/, false/*query_stat*/, + ObQueryFlag::MysqlMode/*sql_mode*/, true/*read_latest*/); + attach_scan_rtdef->scan_flag_.flag_ = query_flag.flag_; + attach_scan_rtdef->tenant_schema_version_ = task_exec_ctx.get_query_tenant_begin_schema_version(); + attach_scan_rtdef->eval_ctx_ = &eval_ctx_; + attach_scan_rtdef->ctdef_ = attach_ctdef; + attach_scan_rtdef->table_loc_ = DAS_CTX(ctx).get_table_loc_by_id(table_loc_->get_table_location_key(), + attach_scan_ctdef->ref_table_id_); + if (OB_FAIL(attach_scan_rtdef->init_pd_op(eval_ctx_.exec_ctx_, *attach_scan_ctdef))) { + LOG_WARN("init pushdown storage filter failed", K(ret)); + } else if (OB_ISNULL(attach_scan_rtdef->table_loc_)) { + if (OB_ISNULL(attach_loc_meta)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get table loc by id failed", K(ret), K(table_loc_->get_table_location_key()), + K(attach_scan_ctdef->ref_table_id_),K(DAS_CTX(ctx).get_table_loc_list())); + } else if (OB_FAIL(DAS_CTX(ctx).extended_table_loc(*attach_loc_meta, attach_scan_rtdef->table_loc_))) { + LOG_WARN("extended table location failed", K(ret), KPC(attach_loc_meta)); + } + } + if (OB_SUCC(ret) && OB_NOT_NULL(attach_scan_rtdef->table_loc_) + && OB_NOT_NULL(attach_scan_rtdef->table_loc_->loc_meta_)) { + if (attach_scan_rtdef->table_loc_->loc_meta_->select_leader_ == 0) { + attach_scan_rtdef->scan_flag_.set_is_select_follower(); + } + } + } + } + return ret; +} + +int ObConflictChecker::attach_related_taskinfo(ObDASScanOp &target_op, ObDASBaseRtDef *attach_rtdef) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(attach_rtdef) || OB_ISNULL(attach_rtdef->ctdef_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("attach rtdef is invalid", K(ret), KP(attach_rtdef), KP(attach_rtdef->ctdef_)); + } else if (attach_rtdef->op_type_ == DAS_OP_TABLE_SCAN) { + const ObDASScanCtDef *scan_ctdef = static_cast(attach_rtdef->ctdef_); + ObDASScanRtDef *scan_rtdef = static_cast(attach_rtdef); + ObDASTableLoc *table_loc = scan_rtdef->table_loc_; + ObDASTabletLoc *tablet_loc = ObDASUtils::get_related_tablet_loc( + *target_op.get_tablet_loc(), table_loc->loc_meta_->ref_table_id_); + if (OB_ISNULL(tablet_loc)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("related tablet loc is not found", K(ret), + KPC(target_op.get_tablet_loc()), + KPC(table_loc->loc_meta_)); + } else if (OB_FAIL(target_op.set_related_task_info(scan_ctdef, + scan_rtdef, + tablet_loc->tablet_id_))) { + LOG_WARN("set attach task info failed", K(ret), KPC(tablet_loc)); + } else { + table_loc->is_reading_ = true; + } + } else { + for (int i = 0; OB_SUCC(ret) && i < attach_rtdef->children_cnt_; ++i) { + if (OB_FAIL(attach_related_taskinfo(target_op, attach_rtdef->children_[i]))) { + LOG_WARN("recursive attach related task info failed", K(ret), K(i)); + } + } } return ret; } diff --git a/src/sql/engine/dml/ob_conflict_checker.h b/src/sql/engine/dml/ob_conflict_checker.h index 8ad854954c..1a739a409b 100644 --- a/src/sql/engine/dml/ob_conflict_checker.h +++ b/src/sql/engine/dml/ob_conflict_checker.h @@ -14,6 +14,7 @@ #define OBDEV_SRC_SQL_ENGINE_DML_OB_CONFLICT_ROW_CHECKER_H_ #include "sql/engine/basic/ob_chunk_datum_store.h" #include "sql/das/ob_das_scan_op.h" +#include "sql/das/ob_das_attach_define.h" #include "sql/engine/dml/ob_dml_ctx_define.h" namespace oceanbase @@ -110,10 +111,11 @@ public: table_column_exprs_(alloc), use_dist_das_(false), rowkey_count_(0), + attach_spec_(alloc, &das_scan_ctdef_), alloc_(alloc) {} virtual ~ObConflictCheckerCtdef() = default; - TO_STRING_KV(K_(cst_ctdefs), K_(das_scan_ctdef), KPC_(calc_part_id_expr)); + TO_STRING_KV(K_(cst_ctdefs), K_(das_scan_ctdef), KPC_(calc_part_id_expr), K_(attach_spec)); // must constraint_infos_.count() == conflict_map_array_.count() // constraint_infos_ 用于生成ObConflictRowMap的key ObRowkeyCstCtdefArray cst_ctdefs_; @@ -129,6 +131,7 @@ public: ExprFixedArray table_column_exprs_; bool use_dist_das_; int64_t rowkey_count_; + ObDASAttachSpec attach_spec_; common::ObIAllocator &alloc_; private: DISALLOW_COPY_AND_ASSIGN(ObConflictCheckerCtdef); @@ -218,6 +221,8 @@ private: int build_tmp_rowkey(ObRowkey *rowkey, ObRowkeyCstCtdef *rowkey_info); int init_das_scan_rtdef(); + int init_attach_scan_rtdef(const ObDASBaseCtDef *attach_ctdef, ObDASBaseRtDef *&attach_rtdef); + int attach_related_taskinfo(ObDASScanOp &target_op, ObDASBaseRtDef *attach_rtdef); int get_tmp_string_buffer(common::ObIAllocator *&allocator); public: @@ -225,6 +230,7 @@ public: ObEvalCtx &eval_ctx_; // 用于表达式的计算 const ObConflictCheckerCtdef &checker_ctdef_; ObDASScanRtDef das_scan_rtdef_; + ObDASAttachRtInfo *attach_rtinfo_; // allocator用来创建hash map, 是ObExecContext内部的allocator 这个不能被reuse common::ObIAllocator &allocator_; // das_scan回表用 diff --git a/src/sql/engine/dml/ob_dml_service.cpp b/src/sql/engine/dml/ob_dml_service.cpp index 466c970c0d..f241fc4d4e 100644 --- a/src/sql/engine/dml/ob_dml_service.cpp +++ b/src/sql/engine/dml/ob_dml_service.cpp @@ -100,6 +100,10 @@ int ObDMLService::check_row_null(const ObExprPtrIArray &row, ret = OB_BAD_NULL_ERROR; LOG_WARN("dml with ignore not supported in roaringbitmap type"); LOG_USER_ERROR(OB_BAD_NULL_ERROR, column_infos.at(i).column_name_.length(), column_infos.at(i).column_name_.ptr()); + } else if (ob_is_collection_sql_type(row.at(col_idx)->obj_meta_.get_type())) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("dml with ignore not supported in collection type"); + LOG_USER_ERROR(OB_BAD_NULL_ERROR, column_infos.at(i).column_name_.length(), column_infos.at(i).column_name_.ptr()); } else if (check_cascaded_reference(row.at(col_idx), row)) { //This column is dependent on other columns and cannot be modified again; //otherwise, it will necessitate a cascading recalculation of the dependent expression results. diff --git a/src/sql/engine/expr/ob_array_cast.cpp b/src/sql/engine/expr/ob_array_cast.cpp new file mode 100644 index 0000000000..8dbdc5867c --- /dev/null +++ b/src/sql/engine/expr/ob_array_cast.cpp @@ -0,0 +1,556 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#define USING_LOG_PREFIX SQL_ENG + +#include "ob_array_cast.h" +#include "lib/json_type/ob_json_tree.h" +#include "lib/json_type/ob_json_parse.h" +#include "share/object/ob_obj_cast.h" + +namespace oceanbase { +namespace sql { + +int ObVectorDataCast::cast(common::ObIAllocator &alloc, ObIArrayType *src, const ObCollectionTypeBase *elem_type, + ObIArrayType *&dst, const ObCollectionTypeBase *dst_elem_type) +{ + int ret = OB_SUCCESS; + const ObCollectionBasicType *src_type = dynamic_cast(elem_type); + const ObCollectionBasicType *dst_type = dynamic_cast(dst_elem_type); + if (OB_UNLIKELY(!src_type || !dst_type)) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("unexpected status: invalid argument", K(ret), KP(src_type), KP(dst_type)); + } else if (dim_cnt_ != src->size()) { + ret = OB_ERR_INVALID_VECTOR_DIM; + LOG_WARN("invalid array size", K(ret), K(dim_cnt_), K(src->size())); + } + for (int64_t i = 0; i < src->size() && OB_SUCC(ret); i++) { + ObObj src_elem; + if (src->get_format() != ArrayFormat::Vector && src->is_null(i)) { + if (OB_FAIL(dst->push_null())) { + LOG_WARN("failed to add null to array", K(ret), K(i)); + } + } else if (OB_FAIL(ObArrayCastUtils::cast_get_element(src, src_type, i, src_elem))) { + LOG_WARN("failed to get cast element", K(ret), K(i)); + } else { + ObObjType dst_obj_type = dst_type->basic_meta_.get_obj_type(); + ObObj res; + ObCastCtx cast_ctx(&alloc, NULL, CM_NONE, ObCharset::get_system_collation()); + if (OB_FAIL(ObObjCaster::to_type(dst_obj_type, cast_ctx, src_elem, res))) { + LOG_WARN("failed to cast number to double type", K(ret)); + } else { + ObVectorData *dst_arr = static_cast(dst); + if (OB_FAIL(dst_arr->push_back(res.get_float()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + } + } + } + return ret; +} + +int ObArrayFixedSizeCast::cast(common::ObIAllocator &alloc, ObIArrayType *src, const ObCollectionTypeBase *elem_type, + ObIArrayType *&dst, const ObCollectionTypeBase *dst_elem_type) +{ + int ret = OB_SUCCESS; + const ObCollectionBasicType *src_type = dynamic_cast(elem_type); + const ObCollectionBasicType *dst_type = dynamic_cast(dst_elem_type); + if (OB_UNLIKELY(!src_type || !dst_type)) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("unexpected status: invalid argument", K(ret), KP(src_type), KP(dst_type)); + } + for (int64_t i = 0; i < src->size() && OB_SUCC(ret); i++) { + ObObj src_elem; + if (src->get_format() != ArrayFormat::Vector && src->is_null(i)) { + if (OB_FAIL(dst->push_null())) { + LOG_WARN("failed to add null to array", K(ret), K(i)); + } + } else if (OB_FAIL(ObArrayCastUtils::cast_get_element(src, src_type, i, src_elem))) { + LOG_WARN("failed to get cast element", K(ret), K(i)); + } else if (OB_FAIL(ObArrayCastUtils::cast_add_element(alloc, src_elem, dst, dst_type))) { + LOG_WARN("failed to cast and add element", K(ret)); + } + } + return ret; +} + +int ObArrayCastUtils::cast_get_element(ObIArrayType *src, const ObCollectionBasicType *elem_type, uint32_t idx, ObObj &src_elem) +{ + int ret = OB_SUCCESS; + ObObjType obj_type = elem_type->basic_meta_.get_obj_type(); + switch (obj_type) { + case ObTinyIntType: { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_tinyint((*arr)[idx]); + break; + } + case ObSmallIntType: { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_smallint((*arr)[idx]); + break; + } + case ObIntType: { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_int((*arr)[idx]); + break; + } + case ObInt32Type: { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_int32((*arr)[idx]); + break; + } + case ObUTinyIntType: { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_utinyint((*arr)[idx]); + break; + } + case ObUSmallIntType: { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_usmallint((*arr)[idx]); + break; + } + case ObUInt64Type: { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_uint64((*arr)[idx]); + break; + } + case ObUInt32Type: { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_uint32((*arr)[idx]); + break; + } + case ObDecimalIntType: { + ObPrecision prec = elem_type->basic_meta_.get_precision(); + if (get_decimalint_type(prec) == DECIMAL_INT_32) { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_decimal_int(sizeof(int32_t), arr->get_scale(), arr->get_decimal_int(idx)); + } else if (get_decimalint_type(prec) == DECIMAL_INT_64) { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_decimal_int(sizeof(int64_t), arr->get_scale(), arr->get_decimal_int(idx)); + } else if (get_decimalint_type(prec) == DECIMAL_INT_128) { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_decimal_int(sizeof(int128_t), arr->get_scale(), arr->get_decimal_int(idx)); + } else if (get_decimalint_type(prec) == DECIMAL_INT_256) { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_decimal_int(sizeof(int256_t), arr->get_scale(), arr->get_decimal_int(idx)); + } else if (get_decimalint_type(prec) == DECIMAL_INT_512) { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_decimal_int(sizeof(int512_t), arr->get_scale(), arr->get_decimal_int(idx)); + } else { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected precision", K(ret), K(prec)); + } + break; + } + case ObVarcharType : { + ObArrayBinary *arr = static_cast(src); + src_elem.set_varchar((*arr)[idx]); + break; + } + case ObDoubleType: { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_double((*arr)[idx]); + break; + } + case ObFloatType: { + ObArrayFixedSize *arr = static_cast *>(src); + src_elem.set_float((*arr)[idx]); + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected element type", K(ret), K(elem_type->basic_meta_.get_obj_type())); + } + } + return ret; +} + +int ObArrayCastUtils::cast_add_element(common::ObIAllocator &alloc, ObObj &src_elem, + ObIArrayType *dst, const ObCollectionBasicType *dst_elem_type) +{ + int ret = OB_SUCCESS; + ObCastCtx cast_ctx(&alloc, NULL, CM_NONE, ObCharset::get_system_collation()); + ObObjType dst_obj_type = dst_elem_type->basic_meta_.get_obj_type(); + ObObj res; + if (OB_FAIL(ObObjCaster::to_type(dst_obj_type, cast_ctx, src_elem, res))) { + LOG_WARN("failed to cast number to double type", K(ret)); + } else { + switch (dst_obj_type) { + case ObTinyIntType : { + ObArrayFixedSize *dst_arr = static_cast *>(dst); + if (OB_FAIL(dst_arr->push_back(res.get_tinyint()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + case ObSmallIntType : { + ObArrayFixedSize *dst_arr = static_cast *>(dst); + if (OB_FAIL(dst_arr->push_back(res.get_smallint()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + case ObIntType : { + ObArrayFixedSize *dst_arr = static_cast *>(dst); + if (OB_FAIL(dst_arr->push_back(res.get_int()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + case ObInt32Type: { + ObArrayFixedSize *dst_arr = static_cast *>(dst); + if (OB_FAIL(dst_arr->push_back(res.get_int32()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + case ObUTinyIntType : { + ObArrayFixedSize *dst_arr = static_cast *>(dst); + if (OB_FAIL(dst_arr->push_back(res.get_utinyint()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + case ObUSmallIntType : { + ObArrayFixedSize *dst_arr = static_cast *>(dst); + if (OB_FAIL(dst_arr->push_back(res.get_usmallint()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + case ObUInt64Type : { + ObArrayFixedSize *dst_arr = static_cast *>(dst); + if (OB_FAIL(dst_arr->push_back(res.get_uint64()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + case ObUInt32Type: { + ObArrayFixedSize *dst_arr = static_cast *>(dst); + if (OB_FAIL(dst_arr->push_back(res.get_uint32()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + case ObDecimalIntType: { + ret = OB_NOT_SUPPORTED; + // to do + break; + } + case ObFloatType: { + ObArrayFixedSize *dst_arr = static_cast *>(dst); + if (OB_FAIL(dst_arr->push_back(res.get_float()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + case ObDoubleType: { + ObArrayFixedSize *dst_arr = static_cast *>(dst); + if (OB_FAIL(dst_arr->push_back(res.get_double()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + case ObVarcharType: { + ObArrayBinary *dst_arr = static_cast(dst); + if (OB_FAIL(dst_arr->push_back(res.get_varchar()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected element type", K(ret), K(dst_obj_type)); + } + } + } + return ret; +} + +#define ADD_SIGNED_FIXED_ARRAY_OBJ(Element_Type) \ + int64_t val; \ + ObArrayFixedSize *dst_arr = static_cast *>(dst); \ + if (OB_FAIL(j_node.to_int(val))) { \ + LOG_WARN("failed to push back array value", K(ret)); \ + } else if (OB_FAIL(int_range_check(dst_obj_type, val, val))) { \ + LOG_WARN("failed to check value range", K(ret), K(val)); \ + } else if (OB_FAIL(dst_arr->push_back(static_cast(val)))) { \ + LOG_WARN("failed to push back array value", K(ret)); \ + } + +#define ADD_UNSIGNED_FIXED_ARRAY_OBJ(Element_Type) \ + uint64_t val; \ + ObArrayFixedSize *dst_arr = static_cast *>(dst); \ + if (OB_FAIL(j_node.to_uint(val))) { \ + LOG_WARN("failed to push back array value", K(ret)); \ + } else if (OB_FAIL(uint_range_check(dst_obj_type, val, val))) { \ + LOG_WARN("failed to check value range", K(ret), K(val)); \ + } else if (OB_FAIL(dst_arr->push_back(static_cast(val)))) { \ + LOG_WARN("failed to push back array value", K(ret)); \ + } + +int ObArrayCastUtils::add_json_node_to_array(common::ObIAllocator &alloc, ObJsonNode &j_node, const ObCollectionTypeBase *elem_type, ObIArrayType *dst) +{ + int ret = OB_SUCCESS; + if (j_node.json_type() == ObJsonNodeType::J_NULL) { + if (OB_FAIL(dst->push_null())) { + LOG_WARN("failed to push null array value", K(ret)); + } + } else if (j_node.json_type() == ObJsonNodeType::J_ARRAY) { + const ObCollectionArrayType *array_type = dynamic_cast(elem_type); + ObIArrayType *child_array = nullptr; + ObJsonArray *json_arr = static_cast(&j_node); + if (OB_ISNULL(array_type)) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("unexpected element type", K(ret), K(elem_type->type_id_)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(alloc, *array_type, child_array))) { + LOG_WARN("failed to add null to array", K(ret)); + } + for (int i = 0; i < json_arr->element_count() && OB_SUCC(ret); i++) { + if (OB_FAIL(add_json_node_to_array(alloc, *(*json_arr)[i], array_type->element_type_, child_array))) { + LOG_WARN("failed to add json node to array", K(ret), K(i)); + } + } + if (OB_SUCC(ret)) { + ObArrayNested *nested_arr = static_cast(dst); + if (OB_FAIL(child_array->init())) { + LOG_WARN("child array init failed", K(ret)); + } else if (OB_FAIL(nested_arr->push_back(*child_array))) { + LOG_WARN("failed to push back array value", K(ret)); + } + } + } else { + // basic type + const ObCollectionBasicType *basic_type = dynamic_cast(elem_type); + if (OB_ISNULL(basic_type)) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("unexpected element type", K(ret), K(elem_type->type_id_)); + } else { + ObObjType dst_obj_type = basic_type->basic_meta_.get_obj_type(); + switch (dst_obj_type) { + case ObTinyIntType: { + ADD_SIGNED_FIXED_ARRAY_OBJ(int8_t); + break; + } + case ObSmallIntType: { + ADD_SIGNED_FIXED_ARRAY_OBJ(int16_t); + break; + } + case ObInt32Type: { + ADD_SIGNED_FIXED_ARRAY_OBJ(int32_t); + break; + } + case ObIntType: { + ADD_SIGNED_FIXED_ARRAY_OBJ(int64_t); + break; + } + case ObUTinyIntType: { + ADD_UNSIGNED_FIXED_ARRAY_OBJ(uint8_t); + break; + } + case ObUSmallIntType: { + ADD_UNSIGNED_FIXED_ARRAY_OBJ(uint16_t); + break; + } + case ObUInt32Type: { + ADD_UNSIGNED_FIXED_ARRAY_OBJ(uint32_t); + break; + } + case ObUInt64Type: { + ADD_SIGNED_FIXED_ARRAY_OBJ(int64_t); + break; + } + case ObDecimalIntType: { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported", K(ret)); + break; + } + case ObFloatType: { + double val; + ObArrayFixedSize *dst_arr = static_cast *>(dst); + if (OB_FAIL(j_node.to_double(val))) { + LOG_WARN("failed to push back array value", K(ret)); + } else if (OB_FAIL(real_range_check(dst_obj_type, val, val))) { + LOG_WARN("failed to check value range", K(ret)); + } else if (OB_FAIL(dst_arr->push_back(static_cast(val)))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + case ObDoubleType: { + double val; + ObArrayFixedSize *dst_arr = static_cast *>(dst); + if (OB_FAIL(j_node.to_double(val))) { + LOG_WARN("failed to push back array value", K(ret)); + } else if (OB_FAIL(real_range_check(dst_obj_type, val, val))) { + LOG_WARN("failed to check value range", K(ret)); + } else if (OB_FAIL(dst_arr->push_back(val))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + case ObVarcharType: { + ObArrayBinary *dst_arr = static_cast(dst); + ObStringBuffer str_buf(&alloc); + if (OB_FAIL(j_node.print(str_buf, false))) { + LOG_WARN("failed to push back array value", K(ret)); + } else if (OB_FAIL(dst_arr->push_back(str_buf.string()))) { + LOG_WARN("failed to push back array value", K(ret)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + OB_LOG(WARN, "unexpected element type", K(ret), K(dst_obj_type)); + } + } + } + } + return ret; +} + +int ObArrayCastUtils::string_cast(common::ObIAllocator &alloc, ObString &arr_text, + ObIArrayType *&dst, const ObCollectionTypeBase *dst_elem_type) +{ + int ret = OB_SUCCESS; + const char *syntaxerr = NULL; + uint64_t err_offset = 0; + ObJsonNode *j_node = NULL; + if (OB_FAIL( + ObJsonParser::parse_json_text(&alloc, arr_text.ptr(), arr_text.length(), syntaxerr, &err_offset, j_node))) { + LOG_WARN("failed to parse array text", K(ret), K(arr_text), KCSTRING(syntaxerr), K(err_offset)); + } else if (j_node->json_type() != ObJsonNodeType::J_ARRAY) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid text. not json type", K(ret), K(arr_text), K(j_node->json_type())); + } else { + for (int i = 0; i < j_node->element_count() && OB_SUCC(ret); i++) { + ObJsonArray *json_arr = static_cast(j_node); + if (OB_FAIL(add_json_node_to_array(alloc, *(*json_arr)[i], dst_elem_type, dst))) { + LOG_WARN("failed to add json node to array", K(ret), K(i)); + } + } + } + + return ret; +} + +int ObArrayBinaryCast::cast(common::ObIAllocator &alloc, ObIArrayType *src, const ObCollectionTypeBase *elem_type, + ObIArrayType *&dst, const ObCollectionTypeBase *dst_elem_type) +{ + int ret = OB_SUCCESS; + const ObCollectionBasicType *src_type = dynamic_cast(elem_type); + const ObCollectionBasicType *dst_type = dynamic_cast(dst_elem_type); + if (OB_UNLIKELY(!src_type || !dst_type)) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("unexpected status: invalid argument", K(ret), KP(src_type), KP(dst_type)); + } else { + ObLength elem_len_max = dst_type->basic_meta_.get_length(); + ObCollationType elem_cs_type = src_type->basic_meta_.get_collation_type(); + ObCollationLevel elem_ncl_type = src_type->basic_meta_.get_collation_level(); + for (int64_t i = 0; i < src->size() && OB_SUCC(ret); i++) { + ObObj src_elem; + if (src->get_format() != ArrayFormat::Vector && src->is_null(i)) { + if (OB_FAIL(dst->push_null())) { + LOG_WARN("failed to add null to array", K(ret), K(i)); + } + } else if (OB_FAIL(ObArrayCastUtils::cast_get_element(src, src_type, i, src_elem))) { + LOG_WARN("failed to get cast element", K(ret), K(i)); + } else if (FALSE_IT(src_elem.set_collation_type(elem_cs_type))) { + } else if (FALSE_IT(src_elem.set_collation_level(elem_ncl_type))) { + }else if (elem_len_max < src_elem.get_string_len()) { + ret = OB_ERR_DATA_TOO_LONG; + LOG_WARN("varchar type length is too long", K(ret), K(i), K(elem_len_max), K(src_elem.get_string_len())); + } else if (OB_FAIL(ObArrayCastUtils::cast_add_element(alloc, src_elem, dst, dst_type))) { + LOG_WARN("failed to cast and add element", K(ret)); + } + } + } + return ret; +} + +int ObArrayNestedCast::cast(common::ObIAllocator &alloc, ObIArrayType *src, const ObCollectionTypeBase *elem_type, + ObIArrayType *&dst, const ObCollectionTypeBase *dst_elem_type) +{ + int ret = OB_SUCCESS; + const ObCollectionArrayType *src_type = dynamic_cast(elem_type); + const ObCollectionArrayType *dst_type = dynamic_cast(dst_elem_type); + ObArrayNested *dst_arr = dynamic_cast(dst); + if (OB_UNLIKELY(!src_type || !dst_type || !dst_arr)) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("unexpected status: invalid argument", K(ret), KP(src_type), KP(dst_type), KP(dst_arr)); + } else { + ObIArrayType *src_elem = nullptr; + ObIArrayType *dst_elem = nullptr; + ObArrayTypeCast *arr_cast = nullptr; + if (OB_FAIL(ObArrayTypeObjFactory::construct(alloc, *src_type, src_elem))) { + LOG_WARN("failed to add null to array", K(ret)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(alloc, *dst_type, dst_elem))) { + LOG_WARN("failed to add null to array", K(ret)); + } else if (OB_FAIL(ObArrayTypeCastFactory::alloc(alloc, *src_type, *dst_type, arr_cast))) { + LOG_WARN("alloc array cast failed", K(ret)); + } + for (int64_t i = 0; i < src->size() && OB_SUCC(ret); i++) { + if (src->get_format() != ArrayFormat::Vector && src->is_null(i)) { + if (OB_FAIL(dst->push_null())) { + LOG_WARN("failed to add null to array", K(ret), K(i)); + } + } else if (OB_FAIL(src->at(i, *src_elem))) { + LOG_WARN("failed to get elem", K(ret), K(i)); + } else if (OB_FAIL(arr_cast->cast(alloc, src_elem, src_type->element_type_, dst_elem, dst_type->element_type_))) { + LOG_WARN("array element cast failed", K(ret)); + } else if (OB_FAIL(dst_elem->init())) { + LOG_WARN("array init failed", K(ret)); + } else if (OB_FAIL(dst_arr->push_back(*dst_elem))) { + LOG_WARN("array push back failed", K(ret)); + } else { + src_elem->clear(); + dst_elem->clear(); + } + } + } + + return ret; +} + +int ObArrayTypeCastFactory::alloc(ObIAllocator &alloc, const ObCollectionTypeBase &src_array_meta, + const ObCollectionTypeBase &dst_array_meta, ObArrayTypeCast *&arr_cast) +{ + int ret = OB_SUCCESS; + UNUSED(src_array_meta); + const ObCollectionArrayType *arr_type = dynamic_cast(&dst_array_meta); + if (arr_type->element_type_->type_id_ == ObNestedType::OB_BASIC_TYPE) { + ObCollectionBasicType *elem_type = static_cast(arr_type->element_type_); + if (ob_is_string_tc(elem_type->basic_meta_.get_obj_type()) + && ObCharType != elem_type->basic_meta_.get_obj_type()) { + arr_cast = OB_NEWx(ObArrayBinaryCast, &alloc); + } else if (arr_type->type_id_ == ObNestedType::OB_VECTOR_TYPE) { + arr_cast = OB_NEWx(ObVectorDataCast, &alloc); + static_cast(arr_cast)->dim_cnt_ = arr_type->dim_cnt_; + } else { + arr_cast = OB_NEWx(ObArrayFixedSizeCast, &alloc); + } + } else if (arr_type->element_type_->type_id_ == ObNestedType::OB_ARRAY_TYPE) { + arr_cast = OB_NEWx(ObArrayNestedCast, &alloc); + } else { + // to do + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported cast type", K(ret), K(arr_type->element_type_->type_id_)); + } + if (OB_SUCC(ret) && OB_ISNULL(arr_cast)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + OB_LOG(WARN, "alloc memory failed", K(ret)); + } + + return ret; +} + +} // namespace sql +} // namespace oceanbase \ No newline at end of file diff --git a/src/sql/engine/expr/ob_array_cast.h b/src/sql/engine/expr/ob_array_cast.h new file mode 100644 index 0000000000..07c09d6e53 --- /dev/null +++ b/src/sql/engine/expr/ob_array_cast.h @@ -0,0 +1,91 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_OB_ARRAY_CAST_ +#define OCEANBASE_OB_ARRAY_CAST_ +#include "lib/udt/ob_collection_type.h" +#include "lib/udt/ob_array_type.h" + +namespace oceanbase { +namespace sql { + +enum ARRAY_CAST_TYPE { + FIXED_SIZE_FIXED_SIZE = 0, + CAST_TYPE_MAX, +}; + +class ObArrayTypeCast +{ +public: + ObArrayTypeCast() {}; + virtual ~ObArrayTypeCast() {}; + virtual int cast(common::ObIAllocator &alloc, ObIArrayType *src, const ObCollectionTypeBase *elem_type, + ObIArrayType *&dst, const ObCollectionTypeBase *dst_elem_type) = 0; +private: + DISALLOW_COPY_AND_ASSIGN(ObArrayTypeCast); +}; + +class ObArrayFixedSizeCast : public ObArrayTypeCast +{ +public: + int cast(common::ObIAllocator &alloc, ObIArrayType *src, const ObCollectionTypeBase *elem_type, + ObIArrayType *&dst, const ObCollectionTypeBase *dst_elem_type); +}; + +class ObVectorDataCast : public ObArrayTypeCast +{ +public: + int cast(common::ObIAllocator &alloc, ObIArrayType *src, const ObCollectionTypeBase *elem_type, + ObIArrayType *&dst, const ObCollectionTypeBase *dst_elem_type); + uint32_t dim_cnt_; +}; + +class ObArrayBinaryCast : public ObArrayTypeCast +{ +public: + int cast(common::ObIAllocator &alloc, ObIArrayType *src, const ObCollectionTypeBase *elem_type, + ObIArrayType *&dst, const ObCollectionTypeBase *dst_elem_type); +}; + +class ObArrayNestedCast : public ObArrayTypeCast +{ +public : +int cast(common::ObIAllocator &alloc, ObIArrayType *src, const ObCollectionTypeBase *elem_type, + ObIArrayType *&dst, const ObCollectionTypeBase *dst_elem_type); +} +; + + + +class ObArrayCastUtils +{ +public: + static int string_cast(common::ObIAllocator &alloc, ObString &arr_text, ObIArrayType *&dst, const ObCollectionTypeBase *dst_elem_type); + static int cast_get_element(ObIArrayType *src, const ObCollectionBasicType *elem_type, uint32_t idx, ObObj &src_elem); + static int cast_add_element(common::ObIAllocator &alloc, ObObj &src_elem, ObIArrayType *dst, const ObCollectionBasicType *dst_elem_type); + static int add_json_node_to_array(common::ObIAllocator &alloc, ObJsonNode &j_node, const ObCollectionTypeBase *elem_type, ObIArrayType *dst); +}; + +class ObArrayTypeCastFactory +{ +public: + ObArrayTypeCastFactory() {}; + virtual ~ObArrayTypeCastFactory() {}; + static int alloc(common::ObIAllocator &alloc, const ObCollectionTypeBase &src_array_meta, + const ObCollectionTypeBase &dst_array_meta, ObArrayTypeCast *&arr_cast); +private: + DISALLOW_COPY_AND_ASSIGN(ObArrayTypeCastFactory); +}; + +} // namespace sql +} // namespace oceanbase +#endif // OCEANBASE_OB_ARRAY_CAST_ diff --git a/src/sql/engine/expr/ob_array_expr_utils.cpp b/src/sql/engine/expr/ob_array_expr_utils.cpp new file mode 100644 index 0000000000..35ac8cefcd --- /dev/null +++ b/src/sql/engine/expr/ob_array_expr_utils.cpp @@ -0,0 +1,1189 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation for ob_array_expr_utils. + */ + +#define USING_LOG_PREFIX SQL_ENG +#include "sql/engine/expr/ob_array_expr_utils.h" +#include "sql/engine/expr/ob_expr_result_type_util.h" + +using namespace oceanbase::common; +namespace oceanbase +{ +namespace sql +{ + +const char* ObArrayExprUtils::DEFAULT_CAST_TYPE_NAME = "ARRAY(FLOAT)"; +const ObString ObArrayExprUtils::DEFAULT_CAST_TYPE_STR = ObString::make_string(DEFAULT_CAST_TYPE_NAME); + +int ObArrayExprUtils::get_type_vector( + const ObExpr &expr, + ObEvalCtx &ctx, + ObIAllocator &allocator, + ObIArrayType *&result, + bool &is_null) +{ + int ret = OB_SUCCESS; + ObDatum *datum = NULL; + if (OB_FAIL(expr.eval(ctx, datum))) { + LOG_WARN("eval failed", K(ret)); + } else if (OB_UNLIKELY(datum->is_null())) { + is_null = true; + } else if (OB_FAIL(get_type_vector(expr, *datum, ctx, allocator, result))) { + LOG_WARN("failed to get vector", K(ret)); + } + return ret; +} + +// get vector or array(float) +int ObArrayExprUtils::get_type_vector( + const ObExpr &expr, + const ObDatum &datum, + ObEvalCtx &ctx, + ObIAllocator &allocator, + ObIArrayType *&result) +{ + int ret = OB_SUCCESS; + ObSubSchemaValue value; + uint16_t subschema_id = expr.obj_meta_.get_subschema_id(); + if (!expr.obj_meta_.is_collection_sql_type()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support", K(ret), K(expr.obj_meta_)); + } else if (OB_FAIL(ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(subschema_id, value))) { + LOG_WARN("failed to get subschema ctx", K(ret)); + } else if (value.type_ >= OB_SUBSCHEMA_MAX_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid subschema type", K(ret), K(value)); + } else { + ObString blob_data = datum.get_string(); + const ObSqlCollectionInfo *coll_info = reinterpret_cast(value.value_); + ObCollectionArrayType *arr_type = static_cast(coll_info->collection_meta_); + if (OB_FAIL(ObTextStringHelper::read_real_string_data(&allocator, + ObLongTextType, + CS_TYPE_BINARY, + true, + blob_data))) { + LOG_WARN("fail to get real data.", K(ret), K(blob_data)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(allocator, *arr_type, result, true))) { + LOG_WARN("construct array obj failed", K(ret), K(*coll_info)); + } else if (OB_FAIL(result->init(blob_data))) { + LOG_WARN("failed to init array", K(ret)); + } + } + return ret; +} + +int ObArrayExprUtils::vector_datum_add(ObDatum &res, const ObDatum &data, ObIAllocator &allocator, bool negative) +{ + int ret = OB_SUCCESS; + ObString blob_res = res.get_string(); + ObString blob_data = data.get_string(); + if (OB_FAIL(ObTextStringHelper::read_real_string_data(&allocator, + ObLongTextType, + CS_TYPE_BINARY, + true, + blob_data))) { + LOG_WARN("fail to get real data.", K(ret), K(blob_data)); + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&allocator, + ObLongTextType, + CS_TYPE_BINARY, + true, + blob_res))) { + LOG_WARN("fail to get real data.", K(ret), K(blob_data)); + } else { + int64_t length = blob_data.length() / sizeof(float); + float *float_data = reinterpret_cast(blob_data.ptr()); + float *float_res = reinterpret_cast(blob_res.ptr()); + for (int64_t i = 0; OB_SUCC(ret) && i < length; ++i) { + negative ? float_res[i] -= float_data[i] : float_res[i] += float_data[i]; + if (isinff(float_res[i]) != 0) { + ret = OB_OPERATE_OVERFLOW; + SQL_LOG(WARN, "value overflow", K(ret), K(i), K(float_data[i]), K(float_res[i])); + } + } + } + return ret; +} + +// cast any array and varchar to array(float) +int ObArrayExprUtils::calc_cast_type( + ObExprResType &type, + common::ObExprTypeCtx &type_ctx, + const bool only_vector) +{ + int ret = OB_SUCCESS; + ObSQLSessionInfo *session = const_cast(type_ctx.get_session()); + ObExecContext *exec_ctx = OB_ISNULL(session) ? NULL : session->get_cur_exec_ctx(); + uint16_t dst_subschema_id = 0; + bool need_cast = false; + if (!type.is_collection_sql_type() && !type.is_string_type() && !type.is_null()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(type)); + } else if (type.is_collection_sql_type()) { + ObSubSchemaValue value; + uint16_t src_subschema_id = type.get_subschema_id(); + if (OB_FAIL(exec_ctx->get_sqludt_meta_by_subschema_id(src_subschema_id, value))) { + LOG_WARN("failed to get subschema ctx", K(ret)); + } else if (value.type_ >= OB_SUBSCHEMA_MAX_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid subschema type", K(ret), K(value)); + } else { + const ObSqlCollectionInfo *coll_info = NULL; + coll_info = reinterpret_cast(value.value_); + if (coll_info->collection_meta_->type_id_ == ObNestedType::OB_ARRAY_TYPE) { + ObCollectionArrayType *arr_type = static_cast(coll_info->collection_meta_); + if (only_vector) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("only support vector type", K(ret)); + } else if (arr_type->element_type_->type_id_ != ObNestedType::OB_BASIC_TYPE) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("nested array is not support", K(ret)); + } else { + ObCollectionBasicType *elem_type = static_cast(arr_type->element_type_); + if (ObFloatType != elem_type->basic_meta_.get_obj_type()) { + need_cast = true; + } + } + } + // vector and array(float) don't need to cast + if (OB_SUCC(ret) && !need_cast) { + type.set_calc_type(ObCollectionSQLType); + type.set_calc_subschema_id(src_subschema_id); // avoid cast by set the same subschema_id + } + } + } else if (type.is_string_type()) { + need_cast = true; + } + if (OB_FAIL(ret)) { + } else if (need_cast) { + if (OB_FAIL(exec_ctx->get_subschema_id_by_type_string(DEFAULT_CAST_TYPE_STR, dst_subschema_id))) { + LOG_WARN("failed to get subschema id by type string", K(ret), K(DEFAULT_CAST_TYPE_STR)); + } else { + type.set_calc_type(ObCollectionSQLType); + type.set_calc_subschema_id(dst_subschema_id); + } + } + + return ret; +} + +int ObArrayExprUtils::collect_vector_cast_info(ObExprResType &type, ObExecContext &exec_ctx, ObVectorCastInfo &info) +{ + int ret = OB_SUCCESS; + if (type.is_collection_sql_type()) { + ObSubSchemaValue value; + info.subschema_id_ = type.get_subschema_id(); + if (OB_FAIL(exec_ctx.get_sqludt_meta_by_subschema_id(info.subschema_id_, value))) { + LOG_WARN("failed to get subschema ctx", K(ret)); + } else if (value.type_ >= OB_SUBSCHEMA_MAX_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid subschema type", K(ret), K(value)); + } else { + const ObSqlCollectionInfo *coll_info = NULL; + coll_info = reinterpret_cast(value.value_); + if (coll_info->collection_meta_->type_id_ == ObNestedType::OB_VECTOR_TYPE) { + ObCollectionArrayType *arr_type = static_cast(coll_info->collection_meta_); + info.is_vector_ = true; + info.dim_cnt_ = arr_type->dim_cnt_; + } else if (coll_info->collection_meta_->type_id_ == ObNestedType::OB_ARRAY_TYPE) { + ObCollectionArrayType *arr_type = static_cast(coll_info->collection_meta_); + ObCollectionBasicType *elem_type = static_cast(arr_type->element_type_); + if (ObFloatType != elem_type->basic_meta_.get_obj_type()) { + info.need_cast_ = true; + } + } + } + } else if (type.is_string_type()) { + info.need_cast_ = true; + } else if (!type.is_null()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(type)); + } + return ret; +} + +int ObArrayExprUtils::calc_cast_type2( + ObExprResType &type1, + ObExprResType &type2, + common::ObExprTypeCtx &type_ctx, + uint16_t &res_subschema_id, + const bool only_vector) +{ + int ret = OB_SUCCESS; + res_subschema_id = UINT16_MAX; + ObSQLSessionInfo *session = const_cast(type_ctx.get_session()); + ObExecContext *exec_ctx = OB_ISNULL(session) ? NULL : session->get_cur_exec_ctx(); + ObString default_dst_type("ARRAY(FLOAT)"); + uint16_t default_dst_subschema_id = UINT16_MAX; + + ObVectorCastInfo info1; + ObVectorCastInfo info2; + if (OB_ISNULL(exec_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("exec ctx is null", K(ret)); + } else if (OB_FAIL(collect_vector_cast_info(type1, *exec_ctx, info1))) { + LOG_WARN("failed to collect vector cast info", K(ret)); + } else if (OB_FAIL(collect_vector_cast_info(type2, *exec_ctx, info2))) { + LOG_WARN("failed to collect vector cast info", K(ret)); + } else if (info1.is_vector_ && info2.is_vector_) { + if (info1.dim_cnt_ != info2.dim_cnt_) { + ret = OB_ERR_INVALID_VECTOR_DIM; + LOG_WARN("check array validty failed", K(ret), K(info1.dim_cnt_), K(info2.dim_cnt_)); + } + } else if (info1.is_vector_) { + if (!type2.is_null()) { + type2.set_calc_type(ObCollectionSQLType); + type2.set_calc_subschema_id(info1.subschema_id_); + info2.need_cast_ = true; + } + res_subschema_id = info1.subschema_id_; + } else if (info2.is_vector_) { + if (!type1.is_null()) { + type1.set_calc_type(ObCollectionSQLType); + type1.set_calc_subschema_id(info2.subschema_id_); + info1.need_cast_ = true; + } + res_subschema_id = info2.subschema_id_; + } else if (only_vector) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("no vector in the expr", K(ret)); + } else if (info1.need_cast_ || info2.need_cast_) { + if (OB_FAIL(exec_ctx->get_subschema_id_by_type_string(default_dst_type, default_dst_subschema_id))) { + LOG_WARN("failed to get subschema id by type string", K(ret), K(default_dst_type)); + } else { + if (info1.need_cast_) { + type1.set_calc_type(ObCollectionSQLType); + type1.set_calc_subschema_id(default_dst_subschema_id); + } + if (info2.need_cast_) { + type2.set_calc_type(ObCollectionSQLType); + type2.set_calc_subschema_id(default_dst_subschema_id); + } + res_subschema_id = default_dst_subschema_id; + } + } + if (OB_SUCC(ret)) { + if (type1.is_collection_sql_type() && !info1.need_cast_) { + type1.set_calc_type(ObCollectionSQLType); + type1.set_calc_subschema_id(type1.get_subschema_id()); // avoid cast by set the same subschema_id + res_subschema_id = type1.get_subschema_id(); + } + if (type2.is_collection_sql_type() && !info2.need_cast_) { + type2.set_calc_type(ObCollectionSQLType); + type2.set_calc_subschema_id(type2.get_subschema_id()); // avoid cast by set the same subschema_id + res_subschema_id = type2.get_subschema_id(); + } + } + return ret; +} + +int ObArrayExprUtils::set_array_res(ObIArrayType *arr_obj, const int32_t res_size, const ObExpr &expr, ObEvalCtx &ctx, ObString &res, const char *data) +{ + int ret = OB_SUCCESS; + char *res_buf = nullptr; + int64_t res_buf_len = 0; + ObDatum tmp_res; + ObTextStringDatumResult str_result(expr.datum_meta_.type_, &expr, &ctx, &tmp_res); + if (OB_FAIL(str_result.init(res_size, nullptr))) { + LOG_WARN("fail to init result", K(ret), K(res_size)); + } else if (OB_FAIL(str_result.get_reserved_buffer(res_buf, res_buf_len))) { + LOG_WARN("fail to get reserver buffer", K(ret)); + } else if (res_buf_len < res_size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid res buf len", K(ret), K(res_buf_len), K(res_size)); + } else if (nullptr != data) { + MEMCPY(res_buf, data, res_size); + } else if (nullptr != arr_obj && OB_FAIL(arr_obj->get_raw_binary(res_buf, res_buf_len))) { + LOG_WARN("get array raw binary failed", K(ret), K(res_buf_len), K(res_size)); + } + if (FAILEDx(str_result.lseek(res_size, 0))) { + LOG_WARN("failed to lseek res.", K(ret), K(str_result), K(res_size)); + } else { + str_result.get_result_buffer(res); + } + return ret; +} + +int ObArrayExprUtils::set_array_res(ObIArrayType *arr_obj, const int32_t res_size, ObIAllocator &allocator, ObString &res, const char *data) +{ + int ret = OB_SUCCESS; + bool has_lob_header = !IS_CLUSTER_VERSION_BEFORE_4_1_0_0; + char *res_buf = nullptr; + int64_t res_buf_len = 0; + ObDatum tmp_res; + ObTextStringDatumResult str_result(ObCollectionSQLType, has_lob_header, &tmp_res); + if (OB_FAIL(str_result.init(res_size, &allocator))) { + LOG_WARN("fail to init result", K(ret), K(res_size)); + } else if (OB_FAIL(str_result.get_reserved_buffer(res_buf, res_buf_len))) { + LOG_WARN("fail to get reserver buffer", K(ret)); + } else if (res_buf_len < res_size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid res buf len", K(ret), K(res_buf_len), K(res_size)); + } else if (nullptr != data) { + MEMCPY(res_buf, data, res_size); + } else if (nullptr != arr_obj && OB_FAIL(arr_obj->get_raw_binary(res_buf, res_buf_len))) { + LOG_WARN("get array raw binary failed", K(ret), K(res_buf_len), K(res_size)); + } + if (FAILEDx(str_result.lseek(res_size, 0))) { + LOG_WARN("failed to lseek res.", K(ret), K(str_result), K(res_size)); + } else { + str_result.get_result_buffer(res); + } + return ret; +} + +template +int ObArrayExprUtils::set_array_res(ObIArrayType *arr_obj, const ObExpr &expr, ObEvalCtx &ctx, + ResVec *res_vec, int64_t batch_idx) +{ + int ret = OB_SUCCESS; + int32_t res_size = arr_obj->get_raw_binary_len(); + char *res_buf = nullptr; + int64_t res_buf_len = 0; + ObTextStringVectorResult str_result(expr.datum_meta_.type_, &expr, &ctx, res_vec, batch_idx); + if (OB_FAIL(str_result.init_with_batch_idx(res_size, batch_idx))) { + LOG_WARN("fail to init result", K(ret), K(res_size)); + } else if (OB_FAIL(str_result.get_reserved_buffer(res_buf, res_buf_len))) { + LOG_WARN("fail to get reserver buffer", K(ret)); + } else if (res_buf_len < res_size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid res buf len", K(ret), K(res_buf_len), K(res_size)); + } else if (OB_FAIL(arr_obj->get_raw_binary(res_buf, res_buf_len))) { + LOG_WARN("get array raw binary failed", K(ret), K(res_buf_len), K(res_size)); + } else if (OB_FAIL(str_result.lseek(res_size, 0))) { + LOG_WARN("failed to lseek res.", K(ret), K(str_result), K(res_size)); + } else { + str_result.set_result(); + } + return ret; +} + +template int ObArrayExprUtils::set_array_res>( + ObIArrayType* arr_obj, const ObExpr& expr, ObEvalCtx& ctx, + ObUniformFormat* res_vec, int64_t batch_idx); + +int ObArrayExprUtils::set_array_obj_res(ObIArrayType *arr_obj, ObObjCastParams *params, ObObj *obj) +{ + int ret = OB_SUCCESS; + bool has_lob_header = !IS_CLUSTER_VERSION_BEFORE_4_1_0_0; + int32_t res_size = arr_obj->get_raw_binary_len(); + char *res_buf = nullptr; + int64_t res_buf_len = 0; + sql::ObTextStringObObjResult text_result(ObCollectionSQLType, params, obj, has_lob_header); + if (OB_FAIL(text_result.init(res_size, params->allocator_v2_))) { + LOG_WARN("init lob result failed"); + } else if (OB_FAIL(text_result.get_reserved_buffer(res_buf, res_buf_len))) { + LOG_WARN("fail to get reserver buffer", K(ret)); + } else if (res_buf_len < res_size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid res buf len", K(ret), K(res_buf_len), K(res_size)); + } else if (OB_FAIL(arr_obj->get_raw_binary(res_buf, res_buf_len))) { + LOG_WARN("get array raw binary failed", K(ret), K(res_buf_len), K(res_size)); + } else if (OB_FAIL(text_result.lseek(res_size, 0))) { + LOG_WARN("failed to lseek res.", K(ret), K(text_result), K(res_size)); + } else { + text_result.set_result(); + } + return ret; +} + +int ObArrayExprUtils::check_array_type_compatibility(ObExecContext *exec_ctx, uint16_t l_subid, uint16_t r_subid, bool &is_compatiable) +{ + int ret = OB_SUCCESS; + ObSubSchemaValue l_meta; + ObSubSchemaValue r_meta; + if (OB_FAIL(exec_ctx->get_sqludt_meta_by_subschema_id(l_subid, l_meta))) { + LOG_WARN("failed to get elem meta.", K(ret), K(l_subid)); + } else if (OB_FAIL(exec_ctx->get_sqludt_meta_by_subschema_id(r_subid, r_meta))) { + LOG_WARN("failed to get elem meta.", K(ret), K(l_subid)); + } else if (l_meta.type_ != ObSubSchemaType::OB_SUBSCHEMA_COLLECTION_TYPE + || r_meta.type_ != ObSubSchemaType::OB_SUBSCHEMA_COLLECTION_TYPE) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("invalid subschema type", K(ret), K(l_meta.type_), K(r_meta.type_)); + } else if (OB_ISNULL(l_meta.value_) || OB_ISNULL(r_meta.value_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("type info is null", K(ret), K(l_meta.value_), K(r_meta.value_)); + } else { + is_compatiable = + reinterpret_cast(l_meta.value_)->has_same_super_type(*reinterpret_cast(r_meta.value_)); + } + return ret; +} + +int ObArrayExprUtils::get_array_element_type(ObExecContext *exec_ctx, uint16_t subid, ObDataType &elem_type, + uint32_t &depth, bool &is_vec) +{ + int ret = OB_SUCCESS; + ObSubSchemaValue meta; + if (OB_FAIL(exec_ctx->get_sqludt_meta_by_subschema_id(subid, meta))) { + LOG_WARN("failed to get elem meta.", K(ret), K(subid)); + } else if (meta.type_ != ObSubSchemaType::OB_SUBSCHEMA_COLLECTION_TYPE) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("invalid subschema type", K(ret), K(meta.type_)); + } else if (OB_ISNULL(meta.value_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("type info is null", K(ret)); + } else { + const ObSqlCollectionInfo * coll_info = reinterpret_cast(meta.value_); + elem_type = coll_info->get_basic_meta(depth); + is_vec = coll_info->collection_meta_->type_id_ == ObNestedType::OB_VECTOR_TYPE; + } + return ret; +} + +int ObArrayExprUtils::get_array_element_type(ObExecContext *exec_ctx, uint16_t subid, ObObjType &obj_type, + uint32_t &depth, bool &is_vec) +{ + int ret = OB_SUCCESS; + ObDataType elem_type; + if (OB_FAIL(get_array_element_type(exec_ctx, subid, elem_type, depth, is_vec))) { + LOG_WARN("failed to get elem meta.", K(ret), K(subid)); + } else { + obj_type = elem_type.get_obj_type(); + } + return ret; +} + +int ObArrayExprUtils::deduce_array_element_type(ObExecContext *exec_ctx, ObExprResType* types_stack, int64_t param_num, ObDataType &elem_type) +{ + int ret = OB_SUCCESS; + uint16_t last_subschema_id = ObInvalidSqlType; + ObExprResType coll_calc_type; + ObLength str_len = 0; + bool is_decimal_exist = false; + bool is_bigint_signed_exsit = false; + bool is_all_num_tc = true; + bool is_all_same_type = true; + ObObjType last_type = ObNullType; + int64_t elem_idx = 0; + ObDecimalIntWideType dec_type = DECIMAL_INT_32; + for (int64_t i = 0; i < param_num && OB_SUCC(ret); i++) { + if (!types_stack[i].is_null()) { + elem_idx = i; + } + if (types_stack[i].get_type() == ObDecimalIntType) { + is_decimal_exist = true; + dec_type = MAX(dec_type, get_decimalint_type(types_stack[i].get_precision())); + } + if (!ob_is_numeric_tc(types_stack[i].get_type_class()) && !types_stack[i].is_null()) { + is_all_num_tc = false; + } + if (types_stack[i].get_type() == ObIntType) { + is_bigint_signed_exsit = true; + } + if (ob_is_collection_sql_type(types_stack[i].get_type())) { + // check subschmea id + if (last_subschema_id == ObInvalidSqlType) { + coll_calc_type = types_stack[i]; + last_subschema_id = types_stack[i].get_subschema_id(); + elem_type.meta_.set_collection(last_subschema_id); + } else if (last_subschema_id != types_stack[i].get_subschema_id()) { + ObExprResType tmp_calc_type; + if (OB_FAIL(ObExprResultTypeUtil::get_array_calc_type(exec_ctx, coll_calc_type, types_stack[i], tmp_calc_type))) { + LOG_WARN("failed to check array compatibilty", K(ret)); + } else { + last_subschema_id = tmp_calc_type.get_subschema_id(); + coll_calc_type = tmp_calc_type; + elem_type.meta_.set_collection(last_subschema_id); + } + } + } else if (i == 0) { + // do nothing + } else if (types_stack[i - 1].get_type() != types_stack[i].get_type() + && !types_stack[i].is_null() && !types_stack[i - 1].is_null()) { // null is legal input type + is_all_same_type = false; + if (!is_all_num_tc) { + ret = OB_ERR_ILLEGAL_ARGUMENT_FOR_FUNCTION; + LOG_USER_ERROR(OB_ERR_ILLEGAL_ARGUMENT_FOR_FUNCTION); + } + } + if (OB_SUCC(ret)) { + if (ob_is_string_tc(types_stack[i].get_type())) { + str_len = MAX(str_len, types_stack[i].get_length()); + } + } + } + if (OB_SUCC(ret) && last_subschema_id == ObInvalidSqlType) { + if (is_all_same_type && !is_decimal_exist) { + if (param_num == 0) { + // default type + elem_type.meta_.set_tinyint(); + } else { + elem_type.set_meta_type(types_stack[elem_idx].get_obj_meta()); + elem_type.set_accuracy(types_stack[elem_idx].get_accuracy()); + if (ob_is_string_tc(types_stack[elem_idx].get_type())) { + elem_type.set_length(str_len); + for (int64_t i = 0; i < param_num; i++) { + if (!types_stack[i].is_null()) { + if (types_stack[i].get_length() != str_len) { + types_stack[i].set_calc_length(str_len); + } + } + } + } + } + } else if (is_decimal_exist) { + elem_type.meta_.set_double(); + for (int64_t i = 0; i < param_num; i++) { + if (!types_stack[i].is_null()) { + if (types_stack[i].get_type() != ObDoubleType) { + types_stack[i].set_calc_type(ObDoubleType); + } + } + } + } else if (is_all_num_tc) { + ObObjType calc_type = ObUInt64Type; + if (is_bigint_signed_exsit) { + elem_type.meta_.set_int(); + calc_type = ObIntType; + } else { + elem_type.meta_.set_uint64(); + } + for (int64_t i = 0; i < param_num; i++) { + if (!types_stack[i].is_null()) { + if (types_stack[i].get_type() != calc_type) { + types_stack[i].set_calc_type(calc_type); + } + } + } + } + } + + return ret; +} + +int ObArrayExprUtils::deduce_nested_array_subschema_id(ObExecContext *exec_ctx, ObDataType &elem_type, uint16_t &subschema_id) +{ + int ret = OB_SUCCESS; + uint16_t elem_subid = elem_type.meta_.get_subschema_id(); + ObSubSchemaValue elem_meta; + if (OB_FAIL(exec_ctx->get_sqludt_meta_by_subschema_id(elem_subid, elem_meta))) { + LOG_WARN("failed to get elem meta.", K(ret), K(elem_subid)); + } else if (elem_meta.type_ != ObSubSchemaType::OB_SUBSCHEMA_COLLECTION_TYPE) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("invalid subschema type", K(ret), K(elem_meta.type_)); + } else { + const int MAX_LEN = 256; + int64_t pos = 0; + char tmp[MAX_LEN] = {0}; + ObString type_info; + const ObSqlCollectionInfo *coll_info = reinterpret_cast(elem_meta.value_); + if (OB_FAIL(databuff_printf(tmp, MAX_LEN, pos, "ARRAY("))) { + LOG_WARN("failed to convert len to string", K(ret)); + } else if (FALSE_IT(STRNCPY(tmp + pos, coll_info->name_def_, coll_info->name_len_))) { + } else if (FALSE_IT(pos += coll_info->name_len_)) { + } else if (OB_FAIL(databuff_printf(tmp, MAX_LEN, pos, ")"))) { + LOG_WARN("failed to add ) to string", K(ret)); + } else if (FALSE_IT(type_info.assign_ptr(tmp, static_cast(pos)))) { + } else if (OB_FAIL(exec_ctx->get_subschema_id_by_type_string(type_info, subschema_id))) { + LOG_WARN("failed get subschema id", K(ret), K(type_info)); + } + } + return ret; +} + +int ObVectorVectorArithFunc::operator()(ObDatum &res, const ObDatum &l, const ObDatum &r, const ObExpr &expr, ObEvalCtx &ctx, ArithType type) const +{ + int ret = OB_SUCCESS; + const ObExpr &left_expr = *expr.args_[0]; + const ObExpr &right_expr = *expr.args_[1]; + ObIArrayType *arr_l = NULL; + ObIArrayType *arr_r = NULL; + ObIArrayType *arr_res = NULL; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + ObSubSchemaValue value; + uint16_t subschema_id = expr.obj_meta_.get_subschema_id(); + const ObSqlCollectionInfo *coll_info = NULL; + ObCollectionArrayType *arr_type = NULL; + if (OB_FAIL(ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(subschema_id, value))) { + LOG_WARN("failed to get subschema ctx", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::get_type_vector(left_expr, l, ctx, tmp_allocator, arr_l))) { + LOG_WARN("failed to get vector", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::get_type_vector(right_expr, r, ctx, tmp_allocator, arr_r))) { + LOG_WARN("failed to get vector", K(ret)); + } else if (OB_ISNULL(arr_l) || OB_ISNULL(arr_r)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(arr_l), K(arr_r)); + } else if (OB_UNLIKELY(arr_l->size() != arr_r->size())) { + ret = OB_ERR_INVALID_VECTOR_DIM; + LOG_WARN("check array validty failed", K(ret), K(arr_l->size()), K(arr_r->size())); + } else if (arr_l->contain_null() || arr_r->contain_null()) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("array with null can't add", K(ret)); + } else if (FALSE_IT(coll_info = reinterpret_cast(value.value_))) { + } else if (OB_ISNULL(coll_info)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("collect info is null", K(ret), K(subschema_id)); + } else if (OB_ISNULL(arr_type = static_cast(coll_info->collection_meta_))) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("array type is null", K(ret), K(subschema_id)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(tmp_allocator, *arr_type, arr_res))) { + LOG_WARN("construct array obj failed", K(ret), K(subschema_id), K(coll_info)); + } else { + const float *data_l = reinterpret_cast(arr_l->get_data()); + const float *data_r = reinterpret_cast(arr_r->get_data()); + const uint32_t size = arr_l->size(); + ObArrayFixedSize *float_array = static_cast *>(arr_res); + for (int64_t i = 0; OB_SUCC(ret) && i < size; ++i) { + const float float_res = type == ADD ? data_l[i] + data_r[i] : + type == MUL ? data_l[i] * data_r[i] : + data_l[i] - data_r[i]; + if (isinff(float_res) != 0) { + ret = OB_OPERATE_OVERFLOW; + LOG_WARN("value overflow", K(ret), K(i), K(data_l[i]), K(data_r[i])); + } else if (OB_FAIL(float_array->push_back(float_res))) { + LOG_WARN("failed to push back value", K(ret), K(float_res)); + } + } + ObString res_str; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObArrayExprUtils::set_array_res(arr_res, + arr_res->get_raw_binary_len(), + ctx.get_expr_res_alloc(), + res_str))) { + LOG_WARN("get array binary string failed", K(ret), K(*coll_info)); + // FIXME huhaosheng.hhs: maybe set batch_idx_ before in order to use frame res_buf + // } else if (OB_FAIL(ObArrayExprUtils::set_array_res(arr_res, expr, ctx, res_str))) { + + // LOG_WARN("get array binary string failed", K(ret), K(*coll_info)); + } else { + res.set_string(res_str); + } + } + return ret; +} + +int ObArrayExprUtils::dispatch_array_attrs(ObEvalCtx &ctx, ObIArrayType *arr_obj, ObExpr **attrs, uint32_t attr_count, const int64_t row_idx) +{ + int ret = OB_SUCCESS; + ObArrayAttr arr_attrs[attr_count]; + uint32_t attr_idx = 0; + if (OB_FAIL(arr_obj->flatten(arr_attrs, attr_count, attr_idx))) { + LOG_WARN("array flatten failed", K(ret)); + } else { + for (uint32_t i = 0; i < attr_count; i++) { + ObIVector *vec = attrs[i]->get_vector(ctx); + if (i == 0) { + vec->set_int(row_idx, arr_obj->size()); + } else if (arr_attrs[i - 1].ptr_ == NULL && arr_attrs[i - 1].length_ == 0) { + vec->set_payload_shallow(row_idx, NULL, 0); // get ride of random values + vec->set_null(row_idx); + } else { + const char *payload = arr_attrs[i - 1].ptr_; + uint32_t len = arr_attrs[i - 1].length_; + vec->set_payload_shallow(row_idx, payload, len); + } + } + } + return ret; +} + +int ObArrayExprUtils::dispatch_array_attrs(ObEvalCtx &ctx, ObExpr &expr, ObString &array_data, const int64_t row_idx) +{ + int ret = OB_SUCCESS; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + // to fix : outrow lob can't use tmp allocator + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + ObSubSchemaValue value; + uint16_t subschema_id = expr.obj_meta_.get_subschema_id(); + const ObSqlCollectionInfo *coll_info = NULL; + if (OB_FAIL(ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(subschema_id, value))) { + LOG_WARN("failed to get subschema ctx", K(ret)); + } else { + ObLobCommon *lob_comm = (ObLobCommon*)(array_data.ptr()); + if (lob_comm->is_valid() + && OB_FAIL(ObTextStringHelper::read_real_string_data(&tmp_allocator, + ObLongTextType, + CS_TYPE_BINARY, + true, + array_data))) { + LOG_WARN("fail to get real data.", K(ret), K(array_data)); + } else { + ObIArrayType *arr_obj = NULL; + coll_info = reinterpret_cast(value.value_); + ObCollectionArrayType *arr_type = static_cast(coll_info->collection_meta_); + if (OB_ISNULL(coll_info)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("collect info is null", K(ret), K(subschema_id)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(tmp_allocator, *arr_type, arr_obj, true))) { + LOG_WARN("construct array obj failed", K(ret), K(subschema_id), K(coll_info)); + } else if (OB_FAIL(arr_obj->init(array_data))) { + LOG_WARN("init array obj failed", K(ret), K(subschema_id), K(coll_info)); + } else if (OB_FAIL(dispatch_array_attrs(ctx, arr_obj, expr.attrs_, expr.attrs_cnt_, row_idx))) { + LOG_WARN("dispatch array attributes failed", K(ret), K(subschema_id), K(coll_info)); + } + } + } + return ret; +} + +int ObVectorFloatArithFunc::operator()(ObDatum &res, const ObDatum &l, const ObDatum &r, const ObExpr &expr, ObEvalCtx &ctx, ArithType type) const +{ + UNUSED(type); + int ret = OB_SUCCESS; + const ObExpr &left_expr = *expr.args_[0]; + const ObExpr &right_expr = *expr.args_[1]; + ObIArrayType *arr_l = NULL; + float data_r = r.get_float(); + ObIArrayType *arr_res = NULL; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + ObSubSchemaValue value; + uint16_t subschema_id = expr.obj_meta_.get_subschema_id(); + const ObSqlCollectionInfo *coll_info = NULL; + ObCollectionArrayType *arr_type = NULL; + if (0 == data_r) { + res.set_null(); + } else if (OB_FAIL(ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(subschema_id, value))) { + LOG_WARN("failed to get subschema ctx", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::get_type_vector(left_expr, l, ctx, tmp_allocator, arr_l))) { + LOG_WARN("failed to get vector", K(ret)); + } else if (OB_ISNULL(arr_l)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(arr_l)); + } else if (arr_l->contain_null()) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("array with null can't add", K(ret)); + } else if (FALSE_IT(coll_info = reinterpret_cast(value.value_))) { + } else if (OB_ISNULL(coll_info)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("collect info is null", K(ret), K(subschema_id)); + } else if (OB_ISNULL(arr_type = static_cast(coll_info->collection_meta_))) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("array type is null", K(ret), K(subschema_id)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(tmp_allocator, *arr_type, arr_res))) { + LOG_WARN("construct array obj failed", K(ret), K(subschema_id), K(coll_info)); + } else { + const float *data_l = reinterpret_cast(arr_l->get_data()); + const uint32_t size = arr_l->size(); + ObVectorData *float_array = static_cast(arr_res); + for (int64_t i = 0; OB_SUCC(ret) && i < size; ++i) { + const float float_res = data_l[i] / data_r; // only support div now + if (isinff(float_res) != 0) { + ret = OB_OPERATE_OVERFLOW; + LOG_WARN("value overflow", K(ret), K(i), K(data_l[i]), K(data_r)); + } else if (OB_FAIL(float_array->push_back(float_res))) { + LOG_WARN("failed to push back value", K(ret), K(float_res)); + } + } + ObString res_str; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObArrayExprUtils::set_array_res(arr_res, + arr_res->get_raw_binary_len(), + ctx.get_expr_res_alloc(), + res_str))) { + LOG_WARN("get array binary string failed", K(ret), K(*coll_info)); + } else { + res.set_string(res_str); + } + } + return ret; +} + +int ObArrayExprUtils::batch_dispatch_array_attrs(ObEvalCtx &ctx, ObExpr &expr, int64_t begin, int64_t batch_size, const uint16_t *selector) +{ + int ret = OB_SUCCESS; + ObIVector *vec = expr.get_vector(ctx); + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + ObIArrayType *arr_obj = NULL; + // to fix : outrow lob can't use tmp allocator + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + if (OB_FAIL(construct_array_obj(tmp_allocator, ctx, expr.obj_meta_.get_subschema_id(), arr_obj))) { + LOG_WARN("fail to construct array obj.", K(ret)); + } else { + for (int64_t row_idx = begin; row_idx < begin + batch_size && OB_SUCC(ret); row_idx++) { + int64_t idx = selector != NULL ? selector[row_idx] : row_idx; + ObString raw_data = vec->get_string(idx); + uint32_t attr_idx = 0; + if (vec->is_null(idx)) { + for (uint32_t i = 0; i < expr.attrs_cnt_; i++) { + ObIVector *attr_vec = expr.attrs_[i]->get_vector(ctx); + attr_vec->set_null(row_idx); + } + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&tmp_allocator, + ObLongTextType, + CS_TYPE_BINARY, + true, + raw_data))) { + LOG_WARN("fail to get real data.", K(ret), K(raw_data)); + } else if (OB_FAIL(arr_obj->init(raw_data))) { + LOG_WARN("init array obj failed", K(ret)); + } else if (OB_FAIL(dispatch_array_attrs_rows(ctx, arr_obj, idx, expr.attrs_, expr.attrs_cnt_))) { + LOG_WARN("failed to dispatch array attrs rows", K(ret)); + } + } + } + return ret; +} + +int ObArrayExprUtils::assemble_array_attrs(ObEvalCtx &ctx, const ObExpr &expr, int64_t row_idx, ObIArrayType *arr_obj) +{ + int ret = OB_SUCCESS; + ObDatum attr_val[expr.attrs_cnt_]; + for (uint32_t i = 0; i < expr.attrs_cnt_; ++i) { + ObIVector *vec = expr.attrs_[i]->get_vector(ctx); + const char *payload = NULL; + ObLength payload_len = 0; + vec->get_payload(row_idx, payload, payload_len); + attr_val[i].ptr_ = payload; + attr_val[i].pack_ = payload_len; + } + if (OB_FAIL(arr_obj->init(attr_val, expr.attrs_cnt_))) { + LOG_WARN("init array attrs failed", K(ret)); + } + return ret; +} + +int ObArrayExprUtils::transform_array_to_uniform(ObEvalCtx &ctx, const ObExpr &expr, const int64_t batch_size, const ObBitVector *skip) +{ + int ret = OB_SUCCESS; + ObSubSchemaValue value; + uint16_t subschema_id = expr.obj_meta_.get_subschema_id(); + const ObBitVector *nulls = NULL; + if (OB_FAIL(ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(subschema_id, value))) { + LOG_WARN("failed to get subschema", K(ret)); + } else if (expr.get_vector(ctx)->get_format() == VEC_DISCRETE + && FALSE_IT(nulls = static_cast(expr.get_vector(ctx))->get_nulls()) ) { + } else if (expr.get_vector(ctx)->get_format() == VEC_CONTINUOUS + && FALSE_IT(nulls = static_cast(expr.get_vector(ctx))->get_nulls()) ) { + } else { + ObIArrayType *arr_obj = NULL; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &alloc = tmp_alloc_g.get_allocator(); + const ObSqlCollectionInfo *coll_info = reinterpret_cast(value.value_); + ObCollectionArrayType *arr_type = static_cast(coll_info->collection_meta_); + if (OB_ISNULL(coll_info)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("collect info is null", K(ret), K(subschema_id)); + } else if (OB_ISNULL(nulls)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("failed to get nulls", K(ret), K(expr.get_vector(ctx)->get_format())); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(alloc, *arr_type, arr_obj, true))) { + LOG_WARN("construct array obj failed", K(ret), K(subschema_id), K(coll_info)); + } else { + ret = expr.init_vector(ctx, VEC_UNIFORM, batch_size); + UniformFormat *root_vec = static_cast(expr.get_vector(ctx)); + for (int64_t row_idx = 0; OB_SUCC(ret) && row_idx < batch_size; ++row_idx) { + if (expr.attrs_cnt_ <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected attrs cnt", K(ret), K(expr.attrs_cnt_)); + } else if (skip != nullptr && skip->at(row_idx)) { + // skip row + } else if (nulls->at(row_idx) || expr.attrs_[0]->get_vector(ctx)->is_null(row_idx)) { + root_vec->set_null(row_idx); + } else if (OB_FAIL(assemble_array_attrs(ctx, expr, row_idx, arr_obj))) { + LOG_WARN("assemble array attrs failed", K(ret)); + } else if (OB_FAIL(set_array_res(arr_obj, expr, ctx, root_vec, row_idx))) { + LOG_WARN("set array res failed", K(ret)); + } + } + } + } + return ret; +} + +int ObArrayExprUtils::calc_nested_expr_data_size(const ObExpr &expr, ObEvalCtx &ctx, const int64_t batch_idx, int64_t &size) +{ + int ret = OB_SUCCESS; + size = 0; + for (uint32_t i = 0; i < expr.attrs_cnt_; ++i) { + ObIVector *vec = expr.attrs_[i]->get_vector(ctx); + VectorFormat format = vec->get_format(); + if (VEC_DISCRETE == format) { + ObDiscreteBase *disc_vec = static_cast(vec); + if (!disc_vec->is_null(batch_idx)) { + ObLength *lens = disc_vec->get_lens(); + size += lens[batch_idx]; + } + } else if (VEC_CONTINUOUS == format) { + ObContinuousBase *cont_vec = static_cast(vec); + uint32_t *offsets = cont_vec->get_offsets(); + size += (offsets[batch_idx + 1] - offsets[batch_idx]); + } else if (is_uniform_format(format)) { + ObUniformBase *uni_vec = static_cast(vec); + ObDatum *datums = uni_vec->get_datums(); + const uint64_t idx_mask = VEC_UNIFORM_CONST == format ? 0 : UINT64_MAX; + size += datums[batch_idx & idx_mask].len_; + } else if (VEC_FIXED == format) { + // array len + size += sizeof(uint32_t); + } + } + ObTextStringResult blob_res(ObLongTextType, true, nullptr); + if (OB_FAIL(blob_res.calc_buffer_len(size))) { + LOG_WARN("calculate data size failed", K(ret)); + } else { + size = blob_res.get_buff_len(); + } + return ret; +} + +int ObArrayExprUtils::construct_array_obj(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t subschema_id, ObIArrayType *&res, bool read_only) +{ + int ret = OB_SUCCESS; + ObSubSchemaValue meta; + if (OB_FAIL(ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(subschema_id, meta))) { + LOG_WARN("failed to get subschema meta", K(ret), K(subschema_id)); + } else { + const ObSqlCollectionInfo *src_coll_info = reinterpret_cast(meta.value_); + ObCollectionArrayType *arr_type = static_cast(src_coll_info->collection_meta_); + if (OB_FAIL(ObArrayTypeObjFactory::construct(alloc, *arr_type, res, read_only))) { + LOG_WARN("construct array obj failed", K(ret), K(src_coll_info)); + } + } + return ret; +} + +int ObArrayExprUtils::get_array_obj(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t subschema_id, const ObString &raw_data, ObIArrayType *&res) +{ + int ret = OB_SUCCESS; + ObString data_str = raw_data; + if (res == NULL && OB_FAIL(construct_array_obj(alloc, ctx, subschema_id, res))) { + LOG_WARN("construct array obj failed", K(ret)); + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&alloc, + ObLongTextType, + CS_TYPE_BINARY, + true, + data_str))) { + LOG_WARN("fail to get real data.", K(ret), K(data_str)); + } else if (OB_FAIL(res->init(data_str))) { + LOG_WARN("failed to init array", K(ret)); + } + return ret; +} + +int ObArrayExprUtils::dispatch_array_attrs_rows(ObEvalCtx &ctx, ObIArrayType *arr_obj, const int64_t row_idx, + ObExpr **attrs, uint32_t attr_count, bool is_shallow) +{ + int ret = OB_SUCCESS; + ObArrayAttr arr_attrs[attr_count]; + uint32_t attr_idx = 0; + if (OB_FAIL(arr_obj->flatten(arr_attrs, attr_count, attr_idx))) { + LOG_WARN("array flatten failed", K(ret)); + } else { + for (uint32_t i = 0; i < attr_count; i++) { + ObIVector *vec = attrs[i]->get_vector(ctx); + if (i == 0) { + vec->set_int(row_idx, arr_obj->size()); + } else if (arr_attrs[i - 1].ptr_ == NULL && arr_attrs[i - 1].length_ == 0) { + vec->set_payload_shallow(row_idx, NULL, 0); // get ride of random values + vec->set_null(row_idx); + } else { + const char *payload = arr_attrs[i - 1].ptr_; + uint32_t len = arr_attrs[i - 1].length_; + (is_shallow || payload == NULL) ? vec->set_payload_shallow(row_idx, payload, len) : vec->set_payload(row_idx, payload, len); + } + } + } + return ret; +} + +int ObArrayExprUtils::nested_expr_from_rows(const ObExpr &expr, ObEvalCtx &ctx, const sql::RowMeta &row_meta, const sql::ObCompactRow **stored_rows, + const int64_t size, const int64_t col_idx, const int64_t *selector) +{ + int ret = OB_SUCCESS; + ObIVector *vec = expr.get_vector(ctx); + VectorFormat format = vec->get_format(); + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + // to fix : outrow lob can't use tmp allocator + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + ObIArrayType *arr_obj = NULL; + const uint16_t subschema_id = expr.obj_meta_.get_subschema_id(); + + if (OB_FAIL(construct_array_obj(tmp_allocator, ctx, subschema_id, arr_obj))) { + LOG_WARN("construct array obj failed", K(ret)); + } + for (int64_t i = 0; i < size && OB_SUCC(ret); i++) { + int64_t row_idx = i; + if (selector != nullptr) { + row_idx = selector[i]; + } + if (stored_rows[i]->is_null(col_idx)) { + vec->set_null(row_idx); + set_expr_attrs_null(expr, ctx, row_idx); + } else { + const char *payload = NULL; + ObLength len = 0; + stored_rows[i]->get_cell_payload(row_meta, col_idx, payload, len); + ObLobCommon *lob_comm = (ObLobCommon*)(payload); + ObString array_data(len, payload); + if (lob_comm->is_valid()) { + if (OB_FAIL(ObTextStringHelper::read_real_string_data(&tmp_allocator, + ObLongTextType, + CS_TYPE_BINARY, + true, + array_data))) { + LOG_WARN("fail to get real data.", K(ret), K(array_data)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(arr_obj->init(array_data))) { + LOG_WARN("failed to init array", K(ret)); + } else if (OB_FAIL(dispatch_array_attrs_rows(ctx, arr_obj, row_idx, expr.attrs_, expr.attrs_cnt_))) { + LOG_WARN("failed to dispatch array attrs rows", K(ret)); + } + } + } + return ret; +} + +int ObArrayExprUtils::nested_expr_to_rows(const ObExpr &expr, ObEvalCtx &ctx, const sql::RowMeta &row_meta, sql::ObCompactRow **stored_rows, + const uint16_t selector[], const int64_t size, const int64_t col_idx) +{ + int ret = OB_SUCCESS; + ObIVector *vec = expr.get_vector(ctx); + VectorFormat format = vec->get_format(); + for (int64_t i = 0; i < size; i++) { + int64_t row_idx = selector[i]; + if (vec->is_null(row_idx)) { + stored_rows[i]->set_null(row_meta, col_idx); + } else { + int64_t pos = 0; + for (uint32_t i = 0; i < expr.attrs_cnt_ && OB_SUCC(ret); ++i) { + const char *payload = NULL; + ObLength payload_len = 0; + ObIVector *vec = expr.attrs_[i]->get_vector(ctx); + VectorFormat format = vec->get_format(); + vec->get_payload(row_idx, payload, payload_len); + stored_rows[i]->append_cell_payload(row_meta, col_idx, payload, payload_len, pos); + } + } + } + return ret; +} + +int ObArrayExprUtils::nested_expr_to_row(const ObExpr &expr, ObEvalCtx &ctx, char *row_buf, + const int64_t col_offset, const uint64_t row_idx, int64_t &cell_len, + const int64_t *remain_size) +{ + int ret = OB_SUCCESS; + uint32_t data_len = 0; + const uint16_t subschema_id = expr.obj_meta_.get_subschema_id(); + ObSubSchemaValue meta; + bool is_vector = false; + if (OB_FAIL(ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(subschema_id, meta))) { + LOG_WARN("failed to get subschema meta", K(ret), K(subschema_id)); + } else { + const ObSqlCollectionInfo *src_coll_info = reinterpret_cast(meta.value_); + ObCollectionArrayType *arr_type = static_cast(src_coll_info->collection_meta_); + is_vector = arr_type->type_id_ == ObNestedType::OB_VECTOR_TYPE; + if (is_vector) { + ObIVector *vec = expr.attrs_[2]->get_vector(ctx); + data_len += vec->get_length(row_idx); + } else { + for (uint32_t i = 0; i < expr.attrs_cnt_ && OB_SUCC(ret); ++i) { + if (i == 0) { + data_len += sizeof(uint32_t); + } else { + ObIVector *vec = expr.attrs_[i]->get_vector(ctx); + data_len += vec->get_length(row_idx); + } + } + } + ObString res_buf; + ObTextStringResult blob_res(ObLongTextType, true, nullptr); + if (OB_FAIL(blob_res.calc_buffer_len(data_len))) { + LOG_WARN("calc buffer len failed", K(ret), K(data_len)); + } else if (remain_size != NULL && blob_res.get_buff_len() > *remain_size) { + ret = OB_BUF_NOT_ENOUGH; + LOG_WARN("row memory isn't enough", K(ret), K(blob_res.get_buff_len()), K(*remain_size)); + } else if (FALSE_IT(res_buf.assign_ptr(row_buf + col_offset, blob_res.get_buff_len()))) { + } else if (OB_FAIL(blob_res.init(data_len, res_buf))) { + LOG_WARN("text string init failed", K(ret), K(data_len)); + } else if (is_vector) { + const char *payload = NULL; + ObLength payload_len = 0; + ObIVector *vec = expr.attrs_[2]->get_vector(ctx); + vec->get_payload(row_idx, payload, payload_len); + if (OB_FAIL(blob_res.append(payload, payload_len))) { + LOG_WARN("failed to append realdata", K(ret), K(payload_len)); + } + } else { + for (uint32_t i = 0; i < expr.attrs_cnt_ && OB_SUCC(ret); ++i) { + const char *payload = NULL; + ObLength payload_len = 0; + ObIVector *vec = expr.attrs_[i]->get_vector(ctx); + uint32_t len = 0; + if (i == 0) { + len = vec->get_uint32(row_idx); + payload = reinterpret_cast(&len); + payload_len = sizeof(uint32_t); + } else { + vec->get_payload(row_idx, payload, payload_len); + } + if (OB_FAIL(blob_res.append(payload, payload_len))) { + LOG_WARN("failed to append realdata", K(ret), K(payload_len)); + } + } + } + if (OB_SUCC(ret)) { + cell_len = blob_res.get_buff_len(); + } + } + return ret; +} + +void ObArrayExprUtils::set_expr_attrs_null(const ObExpr &expr, ObEvalCtx &ctx, const int64_t idx) +{ + ObIVector *vec = expr.get_vector(ctx); + VectorFormat format = vec->get_format(); + if (is_uniform_format(format)) { + // do nothing + } else { + for (uint32_t i = 0; i < expr.attrs_cnt_; ++i) { + ObIVector *vec = expr.attrs_[i]->get_vector(ctx); + vec->set_null(idx); + } + } +} + +int ObNestedVectorFunc::construct_attr_param(ObIAllocator &alloc, ObEvalCtx &ctx, ObExpr ¶m_expr, + const uint16_t meta_id, int64_t row_idx, ObIArrayType *¶m_obj) +{ + int ret = OB_SUCCESS; + if (param_obj == NULL && OB_FAIL(ObArrayExprUtils::construct_array_obj(alloc, ctx, meta_id, param_obj))) { + LOG_WARN("construct array obj failed", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::assemble_array_attrs(ctx, param_expr, row_idx, param_obj))) { + LOG_WARN("assemble array attrs failed", K(ret)); + } + return ret; +} + +int ObNestedVectorFunc::construct_param( + ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t meta_id, ObString &str_data, ObIArrayType *¶m_obj) +{ + return ObArrayExprUtils::get_array_obj(alloc, ctx, meta_id, str_data, param_obj); +} + +int ObNestedVectorFunc::construct_res_obj( + ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t meta_id, ObIArrayType *&res_obj) +{ + return ObArrayExprUtils::construct_array_obj(alloc, ctx, meta_id, res_obj, false); +} + +int ObNestedVectorFunc::construct_params(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t left_meta_id, + const uint16_t right_meta_id, const uint16_t res_meta_id, ObString &left, ObString right, ObIArrayType *&left_obj, + ObIArrayType *&right_obj, ObIArrayType *&res_obj) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObArrayExprUtils::get_array_obj(alloc, ctx, left_meta_id, left, left_obj))) { + SQL_ENG_LOG(WARN, "get array failed", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::get_array_obj(alloc, ctx, right_meta_id, right, right_obj))) { + SQL_ENG_LOG(WARN, "get array failed", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::construct_array_obj(alloc, ctx, res_meta_id, res_obj, false))) { + SQL_ENG_LOG(WARN, "construct res array failed", K(ret)); + } + return ret; +} + +} // sql +} // oceanbase diff --git a/src/sql/engine/expr/ob_array_expr_utils.h b/src/sql/engine/expr/ob_array_expr_utils.h new file mode 100644 index 0000000000..d8467cbf7b --- /dev/null +++ b/src/sql/engine/expr/ob_array_expr_utils.h @@ -0,0 +1,142 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation for ob_array_expr_utils. + */ + +#ifndef OCEANBASE_SQL_OB_ARRAY_EXPR_UTILS_H_ +#define OCEANBASE_SQL_OB_ARRAY_EXPR_UTILS_H_ + +#include "lib/allocator/ob_allocator.h" +#include "lib/string/ob_string.h" +#include "lib/udt/ob_array_type.h" +#include "sql/engine/expr/ob_expr.h" // for ObExpr +#include "sql/session/ob_sql_session_info.h" +#include "sql/engine/ob_exec_context.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" + +namespace oceanbase +{ +namespace sql +{ + +struct ObVectorCastInfo +{ + ObVectorCastInfo() + : is_vector_(false), + need_cast_(false), + subschema_id_(UINT16_MAX), + dim_cnt_(0) + {} + bool is_vector_; + bool need_cast_; + uint16_t subschema_id_; + uint16_t dim_cnt_; +}; + +class ObArrayExprUtils +{ +public: + ObArrayExprUtils(); + virtual ~ObArrayExprUtils() = default; + static int set_array_res(ObIArrayType *arr_obj, const int32_t data_len, const ObExpr &expr, ObEvalCtx &ctx, common::ObString &res, + const char *data = nullptr); + static int set_array_res(ObIArrayType *arr_obj, const int32_t data_len, ObIAllocator &allocator, common::ObString &res, + const char *data = nullptr); + static int set_array_obj_res(ObIArrayType *arr_obj, ObObjCastParams *params, ObObj *obj); + template + static int set_array_res(ObIArrayType *arr_obj, const ObExpr &expr, ObEvalCtx &ctx, + ResVec *res_vec, int64_t batch_idx); + static int deduce_array_element_type(ObExecContext *exec_ctx, ObExprResType* types_stack, int64_t param_num, ObDataType &elem_type); + static int deduce_nested_array_subschema_id(ObExecContext *exec_ctx, ObDataType &elem_type, uint16_t &subschema_id); + static int check_array_type_compatibility(ObExecContext *exec_ctx, uint16_t l_subid, uint16_t r_subid, bool &is_compatiable); + static int get_array_element_type(ObExecContext *exec_ctx, uint16_t subid, ObObjType &obj_type, uint32_t &depth, bool &is_vec); + static int get_array_element_type(ObExecContext *exec_ctx, uint16_t subid, ObDataType &elem_type, uint32_t &depth, bool &is_vec); + static int dispatch_array_attrs(ObEvalCtx &ctx, ObExpr &expr, ObString &array_data, const int64_t row_idx); + static int dispatch_array_attrs(ObEvalCtx &ctx, ObIArrayType *arr_obj, ObExpr **attrs, uint32_t attr_count, const int64_t row_idx); + static int batch_dispatch_array_attrs(ObEvalCtx &ctx, ObExpr &expr, int64_t begin, int64_t batch_size, const uint16_t *selector = NULL); + static int transform_array_to_uniform(ObEvalCtx &ctx, const ObExpr &expr, const int64_t batch_size, const ObBitVector *skip); + static int construct_array_obj(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t subschema_id, ObIArrayType *&res, bool read_only = true); + static int calc_nested_expr_data_size(const ObExpr &expr, ObEvalCtx &ctx, const int64_t batch_idx, int64_t &size); + static int get_array_obj(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t subschema_id, const ObString &raw_data, ObIArrayType *&res); + static int dispatch_array_attrs_rows(ObEvalCtx &ctx, ObIArrayType *arr_obj, const int64_t row_idx, + ObExpr **attrs, uint32_t attr_count, bool is_shallow = true); + static int nested_expr_from_rows(const ObExpr &expr, ObEvalCtx &ctx, const sql::RowMeta &row_meta, const sql::ObCompactRow **stored_rows, + const int64_t size, const int64_t col_idx, const int64_t *selector = NULL); + static int nested_expr_to_rows(const ObExpr &expr, ObEvalCtx &ctx, const sql::RowMeta &row_meta, sql::ObCompactRow **stored_rows, + const uint16_t selector[], const int64_t size, const int64_t col_idx); + static int nested_expr_to_row(const ObExpr &expr, ObEvalCtx &ctx, char *row_buf, + const int64_t col_offset, const uint64_t row_idx, int64_t &cell_len, const int64_t *remain_size = nullptr); + static int assemble_array_attrs(ObEvalCtx &ctx, const ObExpr &expr, int64_t row_idx, ObIArrayType *arr_obj); + static void set_expr_attrs_null(const ObExpr &expr, ObEvalCtx &ctx, const int64_t idx); + + // for vector + static int get_type_vector(const ObExpr &expr, + ObEvalCtx &ctx, + ObIAllocator &allocator, + ObIArrayType *&result, + bool &is_null); + static int get_type_vector(const ObExpr &expr, + const ObDatum &datum, + ObEvalCtx &ctx, + ObIAllocator &allocator, + ObIArrayType *&result); + static int calc_cast_type(ObExprResType &type, common::ObExprTypeCtx &type_ctx, const bool only_vector = false); + static int calc_cast_type2(ObExprResType &type1, ObExprResType &type2, common::ObExprTypeCtx &type_ctx, uint16_t &res_subschema_id, + const bool only_vector = false); + static int collect_vector_cast_info(ObExprResType &type, ObExecContext &exec_ctx, ObVectorCastInfo &info); + + // update inplace + static int vector_datum_add(ObDatum &res, const ObDatum &data, ObIAllocator &allocator, bool negative = false); +private: + static const char* DEFAULT_CAST_TYPE_NAME; + static const ObString DEFAULT_CAST_TYPE_STR; +}; + +struct ObVectorArithFunc +{ + enum ArithType + { + ADD = 0, + MINUS, + MUL, + DIV, + }; +}; + +struct ObVectorVectorArithFunc : public ObVectorArithFunc +{ + + int operator()(ObDatum &res, const ObDatum &l, const ObDatum &r, const ObExpr &expr, ObEvalCtx &ctx, ArithType type) const; +}; + +struct ObVectorFloatArithFunc : public ObVectorArithFunc +{ + int operator()(ObDatum &res, const ObDatum &l, const ObDatum &r, const ObExpr &expr, ObEvalCtx &ctx, ArithType type) const; +}; + +class ObNestedVectorFunc +{ +public: + static int construct_attr_param(ObIAllocator &alloc, ObEvalCtx &ctx, ObExpr ¶m_expr, + const uint16_t meta_id, int64_t row_idx, ObIArrayType *¶m_obj); + static int construct_param(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t meta_id, + ObString &str_data, ObIArrayType *¶m_obj); + static int construct_res_obj(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t meta_id, ObIArrayType *&res_obj); + + static int construct_params(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t left_meta_id, + const uint16_t right_meta_id, const uint16_t res_meta_id, ObString &left, ObString right, + ObIArrayType *&left_obj, ObIArrayType *&right_obj, ObIArrayType *&res_obj); +}; + + +} // sql +} // oceanbase +#endif // OCEANBASE_SQL_OB_ARRAY_EXPR_UTILS_H_ \ No newline at end of file diff --git a/src/sql/engine/expr/ob_batch_eval_util.cpp b/src/sql/engine/expr/ob_batch_eval_util.cpp index 7a6cf7ccfc..3a5f05f896 100644 --- a/src/sql/engine/expr/ob_batch_eval_util.cpp +++ b/src/sql/engine/expr/ob_batch_eval_util.cpp @@ -144,5 +144,64 @@ int binary_operand_vector_eval(const ObExpr &expr, return ret; } +int ObNestedArithOpBaseFunc::construct_attr_param(ObIAllocator &alloc, ObEvalCtx &ctx, ObExpr ¶m_expr, + const uint16_t meta_id, int64_t row_idx, ObIArrayType *¶m_obj) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObArrayExprUtils::construct_array_obj(alloc, ctx, meta_id, param_obj))) { + LOG_WARN("construct array obj failed", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::assemble_array_attrs(ctx, param_expr, row_idx, param_obj))) { + LOG_WARN("assemble array attrs failed", K(ret)); + } + return ret; +} + +int ObNestedArithOpBaseFunc::construct_param(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t meta_id, + ObString &str_data, ObIArrayType *¶m_obj) +{ + return ObArrayExprUtils::get_array_obj(alloc, ctx, meta_id, str_data, param_obj); +} + +int ObNestedArithOpBaseFunc::construct_res_obj(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t meta_id, ObIArrayType *&res_obj) +{ + return ObArrayExprUtils::construct_array_obj(alloc, ctx, meta_id, res_obj, false); +} + +int ObNestedArithOpBaseFunc::construct_params(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t left_meta_id, + const uint16_t right_meta_id, const uint16_t res_meta_id, ObString &left, ObString right, + ObIArrayType *&left_obj, ObIArrayType *&right_obj, ObIArrayType *&res_obj) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObArrayExprUtils::get_array_obj(alloc, ctx, left_meta_id, left, left_obj))) { + SQL_ENG_LOG(WARN, "get array failed", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::get_array_obj(alloc, ctx, right_meta_id, right, right_obj))) { + SQL_ENG_LOG(WARN, "get array failed", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::construct_array_obj(alloc, ctx, res_meta_id, res_obj, false))) { + SQL_ENG_LOG(WARN, "construct res array failed", K(ret)); + } + return ret; +} + +int ObNestedArithOpBaseFunc::get_res(ObEvalCtx &ctx, ObIArrayType *res_obj, const ObExpr &expr, ObString &res_str) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(res_obj->init())) { + LOG_WARN("array init failed", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::set_array_res(res_obj, res_obj->get_raw_binary_len(), expr, ctx, res_str))) { + LOG_WARN("set array result failed", K(ret)); + } + return ret; +} + +int ObNestedArithOpBaseFunc::distribute_expr_attrs(const ObExpr &expr, ObEvalCtx &ctx, const int64_t idx, ObIArrayType &res_obj) +{ + return ObArrayExprUtils::dispatch_array_attrs_rows(ctx, &res_obj, idx, expr.attrs_, expr.attrs_cnt_, false); +} + +void ObNestedArithOpBaseFunc::set_expr_attrs_null(const ObExpr &expr, ObEvalCtx &ctx, const int64_t idx) +{ + ObArrayExprUtils::set_expr_attrs_null(expr, ctx, idx); +} + } // end namespace sql } // end namespace oceanbase diff --git a/src/sql/engine/expr/ob_batch_eval_util.h b/src/sql/engine/expr/ob_batch_eval_util.h index 483099bb8f..bb83dce9f1 100644 --- a/src/sql/engine/expr/ob_batch_eval_util.h +++ b/src/sql/engine/expr/ob_batch_eval_util.h @@ -19,6 +19,8 @@ #include "share/vector/ob_uniform_base.h" #include "share/vector/ob_discrete_base.h" #include "ob_expr_add.h" +#include "sql/engine/expr/ob_array_expr_utils.h" + namespace oceanbase { namespace sql @@ -667,6 +669,59 @@ int def_variable_len_vector_arith_op(VECTOR_EVAL_FUNC_ARG_DECL, Args &... args) } return ret; } + +template +int def_nested_vector_arith_op(VECTOR_EVAL_FUNC_ARG_DECL, Args &... args) +{ + int ret = OB_SUCCESS; + EvalBound pvt_bound = bound; + bool right_evaluated = true; + if (OB_FAIL(binary_operand_vector_eval(expr, ctx, skip, pvt_bound, lib::is_oracle_mode(), + right_evaluated))) { + SQL_LOG(WARN, "binary operand vector evaluate failed", K(ret), K(expr)); + } else { + const ObExpr &left = *expr.args_[0]; + const ObExpr &right = *expr.args_[1]; + const VectorFormat left_format = left.get_format(ctx); + const VectorFormat right_format = right.get_format(ctx); + if (!is_uniform_format(left_format) && !is_uniform_format(right_format) + && (left.attrs_cnt_ == 0 || left.attrs_cnt_ != right.attrs_cnt_)) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + SQL_LOG(WARN, "nested type is mismatch", K(ret), K(expr)); + } else { + const VectorFormat res_format = expr.get_format(ctx); + const int64_t cond = GET_FORMAT_CONDITION(VEC_DISCRETE, res_format, left_format, right_format); + switch (cond) + { + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Uniform, Uniform, Uniform, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Uniform, Const, Uniform, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Uniform, Uniform, Const, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Uniform, Discrete, Discrete, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Uniform, Const, Discrete, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Uniform, Discrete, Const, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Uniform, Uniform, Discrete, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Const, Const, Const, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Discrete, Uniform, Uniform, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Discrete, Discrete, Discrete, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Discrete, Discrete, Uniform, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Discrete, Discrete, Const, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Discrete, Uniform, Discrete, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Discrete, Uniform, Const, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Discrete, Const, Uniform, ArithOp); + FORMAT_DISPATCH_BRANCH(VEC_DISCRETE, Discrete, Const, Discrete, ArithOp); + default: + ret = OB_NOT_SUPPORTED; + SQL_LOG(WARN, "not supported format", K(ret), K(left_format), K(right_format), K(res_format)); + } + } + if (OB_SUCC(ret)) { + SQL_LOG(DEBUG, "expr", K(ToStrVectorHeader(expr, ctx, &skip, pvt_bound))); + SQL_LOG(DEBUG, "expr.args_[0]", K(ToStrVectorHeader(*expr.args_[0], ctx, &skip, pvt_bound))); + SQL_LOG(DEBUG, "expr.args_[1]", K(ToStrVectorHeader(*expr.args_[1], ctx, &skip, pvt_bound))); + } + } + return ret; +} #undef FORMAT_DISPATCH_BRANCH template @@ -769,6 +824,37 @@ int def_variable_len_vector_arith_op_func(VECTOR_EVAL_FUNC_ARG_DECL, Args &...ar VECTOR_EVAL_FUNC_ARG_LIST, args...); } +template +struct ObWrapNestedVectorArithOpNullCheck +{ + template + static int vector_op(ResVector &res_vec, const LeftVector &left_vec, const RightVector &right_vec, + const int64_t idx, const ObExpr &expr, ObEvalCtx &ctx) + { + return VectorFunctor()(res_vec, left_vec, right_vec, idx, expr, ctx); + } + + template + static int null_check_vector_op(ResVector &res_vec, const LeftVector &left_vec, const RightVector &right_vec, + const int64_t idx, const ObExpr &expr, ObEvalCtx &ctx) + { + int ret = OB_SUCCESS; + if (left_vec.is_null(idx) || right_vec.is_null(idx)) { + res_vec.set_null(idx); + VectorFunctor::set_attrs_null(expr, ctx, idx); + } else { + ret = VectorFunctor()(res_vec, left_vec, right_vec, idx, expr, ctx); + } + return ret; + } +}; + +template +int def_nested_vector_arith_op_func(VECTOR_EVAL_FUNC_ARG_DECL, Args &...args) +{ + return def_nested_vector_arith_op, Args...>( + VECTOR_EVAL_FUNC_ARG_LIST, args...); +} // Wrap arith datum operate from raw operate. template @@ -847,6 +933,121 @@ struct ObVectorArithOpWrap : public Base } }; +template +struct ObNestedVectorArithOpFunc : public Base +{ + template + int operator()(ResVector &res_vec, const LeftVector &l_vec, const RightVector &r_vec, + const int64_t idx, const ObExpr &expr, ObEvalCtx &ctx) const + { + int ret = OB_SUCCESS; + + ObString left = l_vec.get_string(idx); + ObString right = r_vec.get_string(idx); + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + const uint16_t left_meta_id = expr.args_[0]->obj_meta_.get_subschema_id(); + const uint16_t right_meta_id = expr.args_[1]->obj_meta_.get_subschema_id(); + const uint16_t res_meta_id = expr.obj_meta_.get_subschema_id(); + ObIArrayType *left_obj = NULL; + ObIArrayType *right_obj = NULL; + ObIArrayType *res_obj = NULL; + ObString res_str; + if (l_vec.get_format() == VEC_UNIFORM || l_vec.get_format() == VEC_UNIFORM_CONST) { + ret = Base::construct_param(tmp_allocator, ctx, left_meta_id, left, left_obj); + } else { + ret = Base::construct_attr_param(tmp_allocator, ctx, *expr.args_[0], left_meta_id, idx, left_obj); + } + if (OB_FAIL(ret)) { + } else if (r_vec.get_format() == VEC_UNIFORM || r_vec.get_format() == VEC_UNIFORM_CONST) { + ret = Base::construct_param(tmp_allocator, ctx, right_meta_id, right, right_obj); + } else { + ret = Base::construct_attr_param(tmp_allocator, ctx, *expr.args_[1], right_meta_id, idx, right_obj); + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(Base::construct_res_obj(tmp_allocator, ctx, res_meta_id, res_obj))) { + SQL_ENG_LOG(WARN, "get array failed", K(ret)); + } else if (OB_FAIL(Base()(*res_obj, *left_obj, *right_obj))) { + SQL_ENG_LOG(WARN, "exec calculate func failed", K(ret)); + } else if (OB_FAIL(res_obj->init())) { + SQL_ENG_LOG(WARN, "init nested obj failed", K(ret)); + } else if (std::is_same>::value || std::is_same>::value) { + if (OB_FAIL(Base::get_res_batch(ctx, res_obj, expr, idx, &res_vec))) { + SQL_ENG_LOG(WARN, "get array binary string failed", K(ret)); + } + } else if (std::is_same::value) { + if (OB_FAIL(Base::distribute_expr_attrs(expr, ctx, idx, *res_obj))) { + SQL_ENG_LOG(WARN, "get array binary string failed", K(ret)); + } + } + return ret; + } + + static void set_attrs_null(const ObExpr &expr, ObEvalCtx &ctx, const int64_t idx) + { + Base::set_expr_attrs_null(expr, ctx, idx); + } +}; + +template +struct ObNestedArithOpWrap : public Base +{ + int operator()(ObDatum &res, const ObDatum &l, const ObDatum &r, const ObExpr &expr, ObEvalCtx &ctx) const + { + int ret = OB_SUCCESS; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + ObString left = l.get_string(); + ObString right = r.get_string(); + const uint16_t left_meta_id = expr.args_[0]->obj_meta_.get_subschema_id(); + const uint16_t right_meta_id = expr.args_[1]->obj_meta_.get_subschema_id(); + const uint16_t res_meta_id = expr.obj_meta_.get_subschema_id(); + ObIArrayType *left_obj = NULL; + ObIArrayType *right_obj = NULL; + ObIArrayType *res_obj = NULL; + ObString res_str; + if (OB_FAIL(Base::construct_params(tmp_allocator, ctx, left_meta_id, right_meta_id, res_meta_id, + left, right, left_obj, right_obj, res_obj))) { + SQL_ENG_LOG(WARN, "get array failed", K(ret)); + } else if (OB_FAIL(Base()(*res_obj, *left_obj, *right_obj))) { + SQL_ENG_LOG(WARN, "exec calculate func failed", K(ret)); + } else if (OB_FAIL(Base::get_res(ctx, res_obj, expr, res_str))) { + SQL_ENG_LOG(WARN, "get array binary string failed", K(ret)); + } else { + res.set_string(res_str); + } + + return ret; + } +}; + +struct ObNestedArithOpBaseFunc +{ + static int construct_attr_param(ObIAllocator &alloc, ObEvalCtx &ctx, ObExpr ¶m_expr, + const uint16_t meta_id, int64_t row_idx, ObIArrayType *¶m_obj); + static int construct_param(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t meta_id, + ObString &str_data, ObIArrayType *¶m_obj); + static int construct_res_obj(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t meta_id, ObIArrayType *&res_obj); + static int construct_params(ObIAllocator &alloc, ObEvalCtx &ctx, const uint16_t left_meta_id, + const uint16_t right_meta_id, const uint16_t res_meta_id, ObString &left, ObString right, + ObIArrayType *&left_obj, ObIArrayType *&right_obj, ObIArrayType *&res_obj); + static int get_res(ObEvalCtx &ctx, ObIArrayType *res_obj, const ObExpr &expr, ObString &res_str); + template + static int get_res_batch(ObEvalCtx &ctx, ObIArrayType *res_obj, const ObExpr &expr, const int64_t row_idx, ResVec *res_vec) + { + int ret = OB_SUCCESS; + if (OB_FAIL(res_obj->init())) { + LOG_WARN("array init failed", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::set_array_res(res_obj, expr, ctx, res_vec, row_idx))) { + LOG_WARN("set array res failed", K(ret)); + } + return ret; + } + + static int distribute_expr_attrs(const ObExpr &expr, ObEvalCtx &ctx, const int64_t idx, ObIArrayType &res_obj); + static void set_expr_attrs_null(const ObExpr &expr, ObEvalCtx &ctx, const int64_t idx); +}; + } // end namespace sql } // end namespace oceanbase diff --git a/src/sql/engine/expr/ob_datum_cast.cpp b/src/sql/engine/expr/ob_datum_cast.cpp index b0a921276e..49b6a46180 100644 --- a/src/sql/engine/expr/ob_datum_cast.cpp +++ b/src/sql/engine/expr/ob_datum_cast.cpp @@ -26,6 +26,7 @@ #include "lib/json_type/ob_json_bin.h" #include "lib/json_type/ob_json_base.h" #include "lib/json_type/ob_json_parse.h" +#include "sql/engine/expr/ob_array_cast.h" #include "lib/roaringbitmap/ob_rb_utils.h" #include "share/ob_lob_access_utils.h" #include "sql/engine/expr/ob_expr_lob_utils.h" @@ -34,6 +35,7 @@ #include "sql/engine/expr/ob_geo_expr_utils.h" #include "lib/udt/ob_udt_type.h" #include "sql/engine/expr/ob_expr_sql_udt_utils.h" +#include "sql/engine/expr/ob_array_expr_utils.h" #include "lib/xml/ob_xml_util.h" #include "sql/engine/expr/ob_expr_xml_func_helper.h" #include "pl/ob_pl.h" @@ -9578,6 +9580,123 @@ CAST_FUNC_NAME(udt, udt) return ret; } +CAST_FUNC_NAME(collection, collection) +{ + EVAL_STRING_ARG() + { + const ObObjMeta &in_obj_meta = expr.args_[0]->obj_meta_; + const uint16_t src_subschema_id = in_obj_meta.get_subschema_id(); + const uint16_t dst_subschema_id = expr.datum_meta_.get_subschema_id(); + ObSubSchemaValue src_meta; + ObSubSchemaValue dst_meta; + + if (OB_FAIL(ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(src_subschema_id, src_meta))) { + LOG_WARN("failed to get subschema meta", K(ret), K(src_subschema_id)); + } else if (OB_FAIL(ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(dst_subschema_id, dst_meta))) { + LOG_WARN("failed to get subschema meta", K(ret), K(dst_subschema_id)); + } else if (src_meta.type_ != ObSubSchemaType::OB_SUBSCHEMA_COLLECTION_TYPE + || src_meta.type_ != dst_meta.type_) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("invalid subschema type", K(ret), K(src_meta.type_), K(dst_meta.type_)); + } else if (src_subschema_id == dst_subschema_id) { + ObExprStrResAlloc expr_res_alloc(expr, ctx); + if (OB_FAIL(res_datum.deep_copy(*child_res, expr_res_alloc))) { + LOG_WARN("Failed to deep copy from res datum", K(ret)); + } + } else { + ObString blob_data = child_res->get_string(); + const ObSqlCollectionInfo *src_coll_info = reinterpret_cast(src_meta.value_); + const ObSqlCollectionInfo *dst_coll_info = reinterpret_cast(dst_meta.value_); + ObCollectionArrayType *arr_type = static_cast(src_coll_info->collection_meta_); + // to do : nested array + ObCollectionBasicType *elem_type = static_cast(arr_type->element_type_); + ObCollectionArrayType *dst_arr_type = static_cast(dst_coll_info->collection_meta_); + // to do : nested array + ObCollectionBasicType *dst_elem_type = static_cast(dst_arr_type->element_type_); + ObIArrayType *arr_src = NULL; + ObIArrayType *arr_dst = NULL; + ObArrayTypeCast *arr_cast = NULL; + ObString res_str; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); + if (OB_FAIL(ObTextStringHelper::read_real_string_data(&temp_allocator, + ObLongTextType, + CS_TYPE_BINARY, + true, + blob_data))) { + LOG_WARN("fail to get real data.", K(ret), K(blob_data)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(temp_allocator, *arr_type, arr_src, true))) { + LOG_WARN("construct array obj failed", K(ret), K(src_coll_info)); + } else if (OB_FAIL(arr_src->init(blob_data))) { + LOG_WARN("failed to init array", K(ret)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(temp_allocator, *dst_arr_type, arr_dst))) { + LOG_WARN("construct array obj failed", K(ret), K(dst_coll_info)); + } else if (OB_FAIL(ObArrayTypeCastFactory::alloc(temp_allocator, *arr_type, + *dst_arr_type, arr_cast))) { + LOG_WARN("alloc array cast failed", K(ret), K(src_coll_info)); + } else if (OB_FAIL(arr_cast->cast(temp_allocator, arr_src, elem_type, arr_dst, dst_elem_type))) { + LOG_WARN("array element cast failed", K(ret), K(*src_coll_info), K(*dst_coll_info)); + if (ret == OB_ERR_ARRAY_TYPE_MISMATCH) { + ObString dst_def = dst_coll_info->get_def_string(); + ObString src_def = src_coll_info->get_def_string(); + LOG_USER_ERROR(OB_ERR_ARRAY_TYPE_MISMATCH, dst_def.length(), dst_def.ptr(), src_def.length(), src_def.ptr()); + } else if (ret == OB_ERR_INVALID_VECTOR_DIM) { + LOG_USER_ERROR(OB_ERR_INVALID_VECTOR_DIM, static_cast(dst_arr_type->dim_cnt_), arr_src->size()); + } + } else if (OB_FAIL(arr_dst->check_validity(*dst_arr_type, *arr_dst))) { + LOG_WARN("check array validty failed", K(ret), K(dst_coll_info)); + if (ret == OB_ERR_INVALID_VECTOR_DIM) { + LOG_USER_ERROR(OB_ERR_INVALID_VECTOR_DIM, static_cast(dst_arr_type->dim_cnt_), arr_dst->size()); + } + } else if (OB_FAIL(ObArrayExprUtils::set_array_res(arr_dst, arr_dst->get_raw_binary_len(), expr, ctx, res_str))) { + LOG_WARN("get array binary string failed", K(ret), K(src_coll_info)); + } else { + res_datum.set_string(res_str); + } + } + } + return ret; +} + +CAST_FUNC_NAME(string, collection) +{ + EVAL_STRING_ARG() + { + const uint16_t dst_subschema_id = expr.datum_meta_.get_subschema_id(); + ObSubSchemaValue dst_meta; + if (OB_FAIL(ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(dst_subschema_id, dst_meta))) { + LOG_WARN("failed to get subschema meta", K(ret), K(dst_subschema_id)); + } else { + ObString in_str = child_res->get_string(); + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); + ObIArrayType *arr_dst = NULL; + ObString res_str; + const ObSqlCollectionInfo *dst_coll_info = reinterpret_cast(dst_meta.value_); + ObCollectionArrayType *dst_arr_type = static_cast(dst_coll_info->collection_meta_); + if (OB_FAIL(ObTextStringHelper::read_real_string_data(temp_allocator, *child_res, + expr.args_[0]->datum_meta_, expr.args_[0]->obj_meta_.has_lob_header(), in_str))) { + LOG_WARN("fail to get real data.", K(ret), K(in_str)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(temp_allocator, *dst_arr_type, arr_dst))) { + LOG_WARN("construct array obj failed", K(ret), K(dst_coll_info)); + } else if (OB_FAIL(ObArrayCastUtils::string_cast(temp_allocator, in_str, arr_dst, dst_arr_type->element_type_))) { + LOG_WARN("array element cast failed", K(ret), K(dst_coll_info)); + } else if (OB_FAIL(arr_dst->check_validity(*dst_arr_type, *arr_dst))) { + LOG_WARN("check array validty failed", K(ret), K(dst_coll_info)); + if (ret == OB_ERR_INVALID_VECTOR_DIM) { + LOG_USER_ERROR(OB_ERR_INVALID_VECTOR_DIM, static_cast(dst_arr_type->dim_cnt_), arr_dst->size()); + } + } else if (OB_FAIL(ObArrayExprUtils::set_array_res(arr_dst, arr_dst->get_raw_binary_len(), expr, ctx, res_str))) { + LOG_WARN("get array binary string failed", K(ret), K(dst_coll_info)); + } else { + res_datum.set_string(res_str); + } + } + } + return ret; +} + + CAST_FUNC_NAME(pl_extend, string) { EVAL_STRING_ARG() @@ -14147,7 +14266,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = cast_eval_arg,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ cast_eval_arg,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_eval_arg,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_eval_arg,/*roaringbitmap*/ @@ -14180,7 +14299,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = int_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ int_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14213,7 +14332,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = uint_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ uint_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14246,7 +14365,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = float_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ float_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14279,7 +14398,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = double_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ double_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14312,7 +14431,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = number_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ number_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14345,7 +14464,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = datetime_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ datetime_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14378,7 +14497,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = date_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ date_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14411,7 +14530,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = time_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ time_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14444,7 +14563,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = year_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ year_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14477,7 +14596,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = string_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ string_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + string_collection,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ string_roaringbitmap,/*roaringbitmap*/ @@ -14510,7 +14629,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = cast_not_support,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ cast_not_support,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14576,7 +14695,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = string_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ text_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + string_collection,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ string_roaringbitmap,/*roaringbitmap*/ @@ -14609,7 +14728,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = bit_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ bit_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14873,7 +14992,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = json_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ json_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14906,7 +15025,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = geometry_geometry,/*geometry*/ cast_not_expected, /*udt*/ geometry_decimalint,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ @@ -14972,43 +15091,43 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = decimalint_geometry,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ decimalint_decimalint, /*decimal int*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*collection, not implemented in mysql mode*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ cast_not_support,/*roaringbitmap*/ }, { /*collection -> XXX*/ - cast_not_expected,/*null*/ - cast_not_expected,/*int*/ - cast_not_expected,/*uint*/ - cast_not_expected,/*float*/ - cast_not_expected,/*double*/ - cast_not_expected,/*number*/ - cast_not_expected,/*datetime*/ - cast_not_expected,/*date*/ - cast_not_expected,/*time*/ - cast_not_expected,/*year*/ - cast_not_expected,/*string*/ - cast_not_expected,/*extend*/ - cast_not_expected,/*unknown*/ - cast_not_expected,/*text*/ - cast_not_expected,/*bit*/ + cast_not_support,/*null*/ + cast_not_support,/*int*/ + cast_not_support,/*uint*/ + cast_not_support,/*float*/ + cast_not_support,/*double*/ + cast_not_support,/*number*/ + cast_not_support,/*datetime*/ + cast_not_support,/*date*/ + cast_not_support,/*time*/ + cast_not_support,/*year*/ + cast_not_support,/*string*/ + cast_not_support,/*extend*/ + cast_not_support,/*unknown*/ + cast_not_support,/*text*/ + cast_not_support,/*bit*/ cast_not_expected,/*enumset*/ cast_not_expected,/*enumset_inner*/ - cast_not_expected,/*otimestamp*/ - cast_not_expected,/*raw*/ + cast_not_support,/*otimestamp*/ + cast_not_support,/*raw*/ cast_not_expected,/*interval*/ cast_not_expected,/*rowid*/ cast_not_expected,/*lob*/ - cast_not_expected,/*json*/ - cast_not_expected,/*geometry*/ + cast_not_support,/*json*/ + cast_not_support,/*geometry*/ cast_not_expected,/*udt, not implemented in mysql mode*/ - cast_not_expected,/*decimalint*/ - cast_not_expected,/*collection, not implemented in mysql mode*/ + cast_not_support,/*decimalint*/ + collection_collection,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ - cast_not_expected,/*roaringbitmap*/ + cast_not_support,/*roaringbitmap*/ }, { /*mysql date -> XXX*/ @@ -15104,7 +15223,7 @@ ObExpr::EvalFunc OB_DATUM_CAST_MYSQL_IMPLICIT[ObMaxTC][ObMaxTC] = cast_not_support,/*geometry*/ cast_not_expected,/*udt*/ cast_not_support,/*decimalint*/ - cast_not_expected,/*collection*/ + cast_not_support,/*collection*/ cast_not_expected,/*mysql date*/ cast_not_expected,/*mysql datetime*/ roaringbitmap_roaringbitmap,/*roaringbitmap*/ @@ -15702,6 +15821,8 @@ int ObDatumCaster::to_type(const ObDatumMeta &dst_type, ob_is_decimal_int_tc(src_type.type_) && ObDatumCast::need_scale_decimalint(src_type.scale_, src_type.precision_, dst_type.scale_, dst_type.precision_); + bool need_cast_collection = (src_type.type_ == dst_type.type_) && + ob_is_collection_sql_type(src_type.type_) && (src_type.cs_type_ != dst_type.cs_type_); if (OB_UNLIKELY(!inited_) || OB_ISNULL(eval_ctx_) || OB_ISNULL(cast_expr_) || OB_ISNULL(extra_cast_expr_)) { ret = OB_NOT_INIT; @@ -15711,7 +15832,7 @@ int ObDatumCaster::to_type(const ObDatumMeta &dst_type, } else if ((ob_is_string_or_lob_type(src_type.type_) && src_type.type_ == dst_type.type_ && src_cs == dst_cs) || (!ob_is_string_or_lob_type(src_type.type_) && !need_cast_decimalint - && src_type.type_ == dst_type.type_)) { + && src_type.type_ == dst_type.type_ && !need_cast_collection)) { LOG_DEBUG("no need to cast, just eval src_expr", K(ret), K(src_expr), K(dst_type)); if (OB_FAIL(src_expr.eval(*eval_ctx_, res))) { LOG_WARN("eval src_expr failed", K(ret)); } } else { @@ -15722,7 +15843,9 @@ int ObDatumCaster::to_type(const ObDatumMeta &dst_type, bool need_extra_cast_for_src_type = false; bool need_extra_cast_for_dst_type = false; - if (str_to_nonstr) { + if (need_cast_collection) { + // do nothing + } else if (str_to_nonstr) { if (CHARSET_BINARY != src_cs && ObCharset::get_default_charset() != src_cs) { need_extra_cast_for_src_type = true; } diff --git a/src/sql/engine/expr/ob_expr.cpp b/src/sql/engine/expr/ob_expr.cpp index 42bcc49b99..99d3b60c7c 100644 --- a/src/sql/engine/expr/ob_expr.cpp +++ b/src/sql/engine/expr/ob_expr.cpp @@ -24,6 +24,7 @@ #include "sql/engine/expr/ob_expr_extra_info_factory.h" #include "sql/engine/expr/ob_datum_cast.h" #include "sql/engine/expr/ob_expr_lob_utils.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase @@ -150,6 +151,7 @@ OB_DEF_SERIALIZE(ObExpr) vec_value_tc_, ser_eval_vector_func_); OB_UNIS_ENCODE(local_session_var_id_); + LST_DO_CODE(OB_UNIS_ENCODE, serialization::make_ser_carray(attrs_, attrs_cnt_)); } return ret; @@ -219,6 +221,7 @@ OB_DEF_DESERIALIZE(ObExpr) ser_eval_vector_func_); OB_UNIS_DECODE(local_session_var_id_); + LST_DO_CODE(OB_UNIS_DECODE, serialization::make_ser_carray(attrs_, attrs_cnt_)); return ret; } @@ -268,6 +271,7 @@ OB_DEF_SERIALIZE_SIZE(ObExpr) ser_eval_vector_func_); OB_UNIS_ADD_LEN(local_session_var_id_); + LST_DO_CODE(OB_UNIS_ADD_LEN, serialization::make_ser_carray(attrs_, attrs_cnt_)); return len; } @@ -307,7 +311,9 @@ ObExpr::ObExpr() cont_buf_off_(UINT32_MAX), null_bitmap_off_(UINT32_MAX), vec_value_tc_(MAX_VEC_TC), - local_session_var_id_(OB_INVALID_INDEX_INT64) + local_session_var_id_(OB_INVALID_INDEX_INT64), + attrs_(NULL), + attrs_cnt_(0) { is_called_in_sql_ = 1; } @@ -708,6 +714,7 @@ int ObExpr::eval_one_datum_of_batch(ObEvalCtx &ctx, common::ObDatum *&datum) con need_evaluate = true; to_bit_vector(frame + eval_flags_off_)->reset(ctx.get_batch_size()); reset_datums_ptr(frame, ctx.get_batch_size()); + reset_attrs_datums(frame, ctx.get_batch_size()); info->evaluated_ = true; info->cnt_ = ctx.get_batch_size(); info->point_to_frame_ = true; @@ -772,6 +779,7 @@ int ObExpr::do_eval_batch(ObEvalCtx &ctx, // FIXME bin.lb: maybe we can optimize this by ObEvalInfo::point_to_frame_ if (!info->evaluated_) { reset_datums_ptr(frame, size); + reset_attrs_datums(frame, size); info->notnull_ = false; info->point_to_frame_ = true; info->evaluated_ = true; @@ -791,7 +799,7 @@ int ObExpr::do_eval_batch(ObEvalCtx &ctx, // for shared expr, may be not use uniform format when first time eval expr // so, we should cast to uniform here if (OB_SUCC(ret)) { - ret = cast_to_uniform(size, ctx); + ret = cast_to_uniform(size, ctx, &skip); } } else { ret = (*eval_batch_func_)(*this, ctx, skip, size); @@ -819,19 +827,25 @@ int ObExpr::do_eval_batch(ObEvalCtx &ctx, // no need evaluate more, cast to uniform for use rich format, // because this expr may calc by eval_vector if (UINT32_MAX != vector_header_off_) { - ret = cast_to_uniform(size, ctx); + ret = cast_to_uniform(size, ctx, &skip); } } return ret; } -int ObExpr::cast_to_uniform(const int64_t size, ObEvalCtx &ctx) const +int ObExpr::cast_to_uniform(const int64_t size, ObEvalCtx &ctx, const ObBitVector *skip) const { int ret = OB_SUCCESS; VectorHeader &vec_header = get_vector_header(ctx); LOG_DEBUG("cast to uniform", K(this), K(*this), K(vec_header.format_), K(size), K(ctx), K(lbt())); if (VEC_INVALID == vec_header.format_) { // do nothing + } else if (is_nested_expr()) { + if (is_uniform_format(vec_header.format_)) { + // do nothing + } else if (OB_FAIL(nested_cast_to_uniform(size, ctx, skip))) { + LOG_WARN("nested cast to uniform failed", K(vec_header.format_), K(size), K(ctx), K(lbt())); + } } else { ObIVector *vec = reinterpret_cast(vec_header.vector_buf_); ObDatum *datums = locate_batch_datums(ctx); @@ -975,6 +989,7 @@ int ObExpr::init_vector(ObEvalCtx &ctx, CONTINUOUS_VECTOR_INIT_SWITCH(VEC_TC_JSON); CONTINUOUS_VECTOR_INIT_SWITCH(VEC_TC_GEO); CONTINUOUS_VECTOR_INIT_SWITCH(VEC_TC_UDT); + CONTINUOUS_VECTOR_INIT_SWITCH(VEC_TC_COLLECTION); CONTINUOUS_VECTOR_INIT_SWITCH(VEC_TC_ROARINGBITMAP); #undef CONTINUOUS_VECTOR_INIT_SWITCH default: @@ -1007,6 +1022,7 @@ int ObExpr::init_vector(ObEvalCtx &ctx, DISCRETE_VECTOR_INIT_SWITCH(VEC_TC_JSON); DISCRETE_VECTOR_INIT_SWITCH(VEC_TC_GEO); DISCRETE_VECTOR_INIT_SWITCH(VEC_TC_UDT); + DISCRETE_VECTOR_INIT_SWITCH(VEC_TC_COLLECTION); DISCRETE_VECTOR_INIT_SWITCH(VEC_TC_ROARINGBITMAP); #undef DISCRETE_VECTOR_INIT_SWITCH default: @@ -1057,6 +1073,7 @@ int ObExpr::init_vector(ObEvalCtx &ctx, UNIFORM_VECTOR_INIT_SWITCH(VEC_TC_JSON); UNIFORM_VECTOR_INIT_SWITCH(VEC_TC_GEO); UNIFORM_VECTOR_INIT_SWITCH(VEC_TC_UDT); + UNIFORM_VECTOR_INIT_SWITCH(VEC_TC_COLLECTION); UNIFORM_VECTOR_INIT_SWITCH(VEC_TC_ROARINGBITMAP); #undef UNIFORM_VECTOR_INIT_SWITCH default: @@ -1077,6 +1094,50 @@ int ObExpr::init_vector(ObEvalCtx &ctx, if (OB_SUCC(ret)) { ObVectorBase *vector = reinterpret_cast (vector_buf); vector->set_max_row_cnt(size); + for (uint32_t i = 0; i < attrs_cnt_ && OB_SUCC(ret); ++i) { + VectorFormat attr_format = format; + if (OB_ISNULL(attrs_[i])) { + ret = OB_ERR_UNEXPECTED; + SQL_LOG(WARN, "Unexpected null attr", K(ret), K(i)); + } else if (format == VEC_UNIFORM_CONST || format == VEC_UNIFORM) { + // do nothing + } else { + attr_format = i == 0 ? attrs_[i]->get_default_res_format() : format; + if (OB_FAIL(attrs_[i]->init_vector(ctx, attr_format, size, use_reserve_buf))) { + SQL_LOG(WARN, "Failed to init vector", K(ret), K(i), K(format), K(size)); + } + } + + } + } + return ret; +} + +void ObExpr::reset_attr_datums_ptr(char *frame, const int64_t size) +{ + ObDatum *datum = reinterpret_cast(frame + datum_off_); + ObDatum *datum_end = datum + size; + char *ptr = frame + res_buf_off_; + for (; datum < datum_end; datum += 1) { + if (datum->ptr_ != ptr) { + datum->ptr_ = ptr; + } + ptr += res_buf_len_; + } +} + +void ObExpr::reset_attrs_datums(char *frame, const int64_t size) const +{ + for (uint32_t idx = 0; idx < attrs_cnt_; ++idx) { + attrs_[idx]->reset_datums_ptr(frame, size); + } +} + +int ObExpr::nested_cast_to_uniform(const int64_t size, ObEvalCtx &ctx, const ObBitVector *skip) const +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObArrayExprUtils::transform_array_to_uniform(ctx, *this, size, skip))) { + SQL_LOG(WARN, "failed to cast array to uniform", K(ret), K(size)); } return ret; } @@ -1124,6 +1185,7 @@ int VectorHeader::init_uniform_const_vector(VecValueTypeClass vec_value_tc, UNIFORM_CONST_VECTOR_INIT_SWITCH(VEC_TC_JSON); UNIFORM_CONST_VECTOR_INIT_SWITCH(VEC_TC_GEO); UNIFORM_CONST_VECTOR_INIT_SWITCH(VEC_TC_UDT); + UNIFORM_CONST_VECTOR_INIT_SWITCH(VEC_TC_COLLECTION); UNIFORM_CONST_VECTOR_INIT_SWITCH(VEC_TC_ROARINGBITMAP); #undef UNIFORM_CONST_VECTOR_INIT_SWITCH default: @@ -1331,6 +1393,9 @@ int eval_assign_question_mark_func(const ObExpr &expr, ObEvalCtx &ctx, ObDatum & res_acc.precision_ = expr.datum_meta_.precision_; cast_ctx.res_accuracy_ = &res_acc; } + if (dst_meta.is_collection_sql_type()) { + dst_obj.meta_.set_meta(dst_meta); + } cast_ctx.exec_ctx_ = &ctx.exec_ctx_; if (OB_FAIL(ObObjCaster::to_type(dst_meta.get_type(), cast_ctx, v, dst_obj))) { LOG_WARN("failed to cast obj to dst type", K(ret), K(v), K(dst_meta)); diff --git a/src/sql/engine/expr/ob_expr.h b/src/sql/engine/expr/ob_expr.h index fb06189748..f804173300 100644 --- a/src/sql/engine/expr/ob_expr.h +++ b/src/sql/engine/expr/ob_expr.h @@ -117,6 +117,7 @@ public: && (common::T_EXT_SQL_ARRAY == static_cast(scale_))); } OB_INLINE common::ObObjType get_type() const { return type_; } + OB_INLINE uint16_t get_subschema_id() const { return static_cast(cs_type_); } }; // Expression evaluate result info @@ -504,7 +505,8 @@ public: return UINT32_MAX != vector_header_off_ && expr_default_eval_vector_func != eval_vector_func_; } - int cast_to_uniform(const int64_t size, ObEvalCtx &ctx) const; + int cast_to_uniform(const int64_t size, ObEvalCtx &ctx, const ObBitVector *skip = nullptr) const; + int nested_cast_to_uniform(const int64_t size, ObEvalCtx &ctx, const ObBitVector *skip) const; uint64_t get_batch_idx_mask(ObEvalCtx &ctx) { return batch_idx_mask_; } @@ -738,6 +740,10 @@ public: OB_INLINE void unset_null(ObEvalCtx &ctx, int64_t batch_idx) { get_nulls(ctx).unset(batch_idx); } + void reset_attr_datums_ptr(char *frame, const int64_t size); + void reset_attrs_datums(char *frame, const int64_t size) const; + OB_INLINE bool is_nested_expr() const { return attrs_cnt_ > 0; } + TO_STRING_KV("type", get_type_name(type_), K_(datum_meta), @@ -914,6 +920,8 @@ public: uint32_t null_bitmap_off_; VecValueTypeClass vec_value_tc_; int64_t local_session_var_id_; + ObExpr **attrs_; + uint32_t attrs_cnt_; }; // helper template to access ObExpr::extra_ @@ -1318,7 +1326,7 @@ OB_INLINE int ObExpr::eval_batch(ObEvalCtx &ctx, } else if (info.projected_ || NULL == eval_batch_func_) { // expr values is projected by child or has no evaluate func, do nothing. if (UINT32_MAX != vector_header_off_) { - ret = cast_to_uniform(size, ctx); + ret = cast_to_uniform(size, ctx, &skip); } } else if (size > 0) { ret = do_eval_batch(ctx, skip, size); @@ -1341,7 +1349,7 @@ OB_INLINE int ObExpr::eval_vector(ObEvalCtx &ctx, OB_INLINE VectorFormat ObExpr::get_default_res_format() const { return !batch_result_ ? VEC_UNIFORM_CONST - : (datum_meta_.type_ == ObNullType ? VEC_UNIFORM + : ((datum_meta_.type_ == ObNullType || datum_meta_.type_ == ObCollectionSQLType) ? VEC_UNIFORM : (is_fixed_length_data_ ? VEC_FIXED : VEC_DISCRETE)); } diff --git a/src/sql/engine/expr/ob_expr_add.cpp b/src/sql/engine/expr/ob_expr_add.cpp index 3c044a4d68..4611c46564 100644 --- a/src/sql/engine/expr/ob_expr_add.cpp +++ b/src/sql/engine/expr/ob_expr_add.cpp @@ -21,6 +21,7 @@ #include "sql/engine/expr/ob_batch_eval_util.h" #include "sql/resolver/expr/ob_raw_expr_util.h" #include "sql/engine/expr/ob_expr_util.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase @@ -61,6 +62,30 @@ int ObExprAdd::calc_result_type2(ObExprResType &type, } else if (type.is_decimal_int() && (type1.is_null() || type2.is_null())) { type.set_scale(MAX(type1.get_scale(), type2.get_scale())); type.set_precision(MAX(type1.get_precision(), type2.get_precision())); + } else if (type.is_collection_sql_type()) { + if (type1.is_collection_sql_type() && type2.is_collection_sql_type()) { + ObSQLSessionInfo *session = const_cast(type_ctx.get_session()); + ObExecContext *exec_ctx = OB_ISNULL(session) ? NULL : session->get_cur_exec_ctx(); + ObExprResType coll_calc_type = type; + if (OB_FAIL(ObExprResultTypeUtil::get_array_calc_type(exec_ctx, type1, type2, coll_calc_type))) { + LOG_WARN("failed to check array compatibilty", K(ret)); + } else { + type1.set_calc_meta(coll_calc_type); + type2.set_calc_meta(coll_calc_type); + type.set_collection(coll_calc_type.get_subschema_id()); + } + } else { + // only support vector/array/varchar + vector/array/varchar now // array and varchar need cast to array(float) + uint16_t res_subschema_id = UINT16_MAX; + if (OB_FAIL(ObArrayExprUtils::calc_cast_type2(type1, type2, type_ctx, res_subschema_id))) { + LOG_WARN("failed to calc cast type", K(ret), K(type1)); + } else if (UINT16_MAX == res_subschema_id) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected result subschema_id", K(ret)); + } else { + type.set_collection(res_subschema_id); + } + } } else if (OB_UNLIKELY(SCALE_UNKNOWN_YET == type1.get_scale()) || OB_UNLIKELY(SCALE_UNKNOWN_YET == type2.get_scale())) { type.set_scale(NUMBER_SCALE_UNKNOWN_YET); @@ -483,7 +508,6 @@ int ObExprAdd::cg_expr(ObExprCGCtx &op_cg_ctx, int ret = OB_SUCCESS; UNUSED(raw_expr); - UNUSED(op_cg_ctx); if (rt_expr.arg_cnt_ != 2 || OB_ISNULL(rt_expr.args_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("count of children is not 2 or children is null", K(ret), K(rt_expr.arg_cnt_), @@ -680,6 +704,41 @@ int ObExprAdd::cg_expr(ObExprCGCtx &op_cg_ctx, break; } break; + case ObCollectionSQLType: { + ObExecContext *exec_ctx = op_cg_ctx.session_->get_cur_exec_ctx(); + const uint16_t sub_id = rt_expr.obj_meta_.get_subschema_id(); + ObObjType elem_type; + uint32_t unused; + bool is_vec = false; + if (OB_FAIL(ObArrayExprUtils::get_array_element_type(exec_ctx, sub_id, elem_type, unused, is_vec))) { + LOG_WARN("failed to get collection elem type", K(ret), K(sub_id)); + } else if (elem_type == ObTinyIntType) { + SET_ADD_FUNC_PTR(add_collection_collection_int8_t); + rt_expr.eval_vector_func_ = add_collection_collection_int8_t_vector; + } else if (elem_type == ObSmallIntType) { + SET_ADD_FUNC_PTR(add_collection_collection_int16_t); + rt_expr.eval_vector_func_ = add_collection_collection_int16_t_vector; + } else if (elem_type == ObInt32Type) { + SET_ADD_FUNC_PTR(add_collection_collection_int32_t); + rt_expr.eval_vector_func_ = add_collection_collection_int32_t_vector; + } else if (elem_type == ObIntType) { + SET_ADD_FUNC_PTR(add_collection_collection_int64_t); + rt_expr.eval_vector_func_ = add_collection_collection_int64_t_vector; + } else if (elem_type == ObFloatType) { + SET_ADD_FUNC_PTR(add_collection_collection_float); + rt_expr.eval_vector_func_ = add_collection_collection_float_vector; + } else if (elem_type == ObDoubleType) { + SET_ADD_FUNC_PTR(add_collection_collection_double); + rt_expr.eval_vector_func_ = add_collection_collection_double_vector; + } else if (elem_type == ObUInt64Type) { + SET_ADD_FUNC_PTR(add_collection_collection_uint64_t); + rt_expr.eval_vector_func_ = add_collection_collection_uint64_t_vector; + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("invalid element type for array operation", K(ret), K(elem_type)); + } + break; + } default: break; } @@ -1688,6 +1747,83 @@ DECINC_ADD_EVAL_FUNC_ORA_DECL(int128) #undef DECINC_ADD_EVAL_FUNC_ORA_DECL +template +struct ObArrayAddFunc : public ObNestedArithOpBaseFunc +{ + int operator()(ObIArrayType &res, const ObIArrayType &l, const ObIArrayType &r) const + { + int ret = OB_SUCCESS; + + if (l.get_format() != r.get_format() || res.get_format() != r.get_format()) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("nested type is mismatch", K(ret), K(l.get_format()), K(r.get_format()), K(res.get_format())); + } else if (l.size() != r.size()) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("nested size is mismatch", K(ret), K(l.size()), K(r.size())); + } else if (l.get_format() != ArrayFormat::Vector && MEMCMP(l.get_nullbitmap(), r.get_nullbitmap(), sizeof(uint8_t) * l.size())) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("nested nullbitmap is mismatch", K(ret)); + } else if (l.get_format() == ArrayFormat::Nested_Array) { + // compare array dimension + const ObArrayNested &left = static_cast(l); + const ObArrayNested &right = static_cast(r); + ObArrayNested &nest_res = static_cast(res); + if (MEMCMP(left.get_nullbitmap(), right.get_nullbitmap(), sizeof(uint8_t) * left.size()) != 0) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("nested nullbitmap is mismatch", K(ret)); + } else if (MEMCMP(left.get_offsets(), right.get_offsets(), sizeof(uint32_t) * left.size()) != 0) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("nested offsets is mismatch", K(ret)); + } else if (OB_FAIL(res.set_null_bitmaps(left.get_nullbitmap(), left.size()))) { + LOG_WARN("nested nullbitmap copy failed", K(ret)); + } else if (OB_FAIL(res.set_offsets(left.get_offsets(), left.size()))) { + LOG_WARN("nested offset copy failed", K(ret)); + } else if (OB_FAIL(operator()(*nest_res.get_child_array(), *left.get_child_array(), *right.get_child_array()))) { + LOG_WARN("nested child array add failed", K(ret)); + } + } else if (l.get_format() != ArrayFormat::Fixed_Size && l.get_format() != ArrayFormat::Vector) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("invaid array type", K(ret), K(l.get_format())); + } else { + T *res_data = NULL; + if (l.get_format() != ArrayFormat::Vector && OB_FAIL(res.set_null_bitmaps(l.get_nullbitmap(), l.size()))) { + LOG_WARN("array nullbitmap copy failed", K(ret)); + } else if (OB_FAIL(static_cast &>(res).get_reserved_data(l.size(), res_data))) { + LOG_WARN("array get resered data failed", K(ret)); + } else { + T *left_data = reinterpret_cast(l.get_data()); + T *right_data = reinterpret_cast(r.get_data()); + for (int64_t i = 0; i < l.size(); ++i) { + res_data[i] = left_data[i] + right_data[i]; + } + } + } + return ret; + } +}; + + +#define COLLECTION_ADD_EVAL_FUNC_DECL(TYPE) \ +int ObExprAdd::add_collection_collection_##TYPE(EVAL_FUNC_ARG_DECL) \ +{ \ + return def_arith_eval_func>>(EVAL_FUNC_ARG_LIST, expr, ctx); \ +} \ +int ObExprAdd::add_collection_collection_##TYPE##_batch(BATCH_EVAL_FUNC_ARG_DECL) \ +{ \ + return def_batch_arith_op_by_datum_func>>(BATCH_EVAL_FUNC_ARG_LIST, expr, ctx); \ +} \ +int ObExprAdd::add_collection_collection_##TYPE##_vector(VECTOR_EVAL_FUNC_ARG_DECL) \ +{ \ + return def_nested_vector_arith_op_func>>(VECTOR_EVAL_FUNC_ARG_LIST, expr, ctx); \ +} + +COLLECTION_ADD_EVAL_FUNC_DECL(int8_t) +COLLECTION_ADD_EVAL_FUNC_DECL(int16_t) +COLLECTION_ADD_EVAL_FUNC_DECL(int32_t) +COLLECTION_ADD_EVAL_FUNC_DECL(int64_t) +COLLECTION_ADD_EVAL_FUNC_DECL(uint64_t) +COLLECTION_ADD_EVAL_FUNC_DECL(float) +COLLECTION_ADD_EVAL_FUNC_DECL(double) } } diff --git a/src/sql/engine/expr/ob_expr_add.h b/src/sql/engine/expr/ob_expr_add.h index e59812c700..ebc766fbda 100644 --- a/src/sql/engine/expr/ob_expr_add.h +++ b/src/sql/engine/expr/ob_expr_add.h @@ -305,10 +305,31 @@ public: static int add_decimalint32_oracle_vector(VECTOR_EVAL_FUNC_ARG_DECL); static int add_decimalint64_oracle(EVAL_FUNC_ARG_DECL); static int add_decimalint64_oracle_batch(BATCH_EVAL_FUNC_ARG_DECL); - static int add_decimalint64_oracle_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int add_decimalint64_oracle_vector(VECTOR_EVAL_FUNC_ARG_DECL); static int add_decimalint128_oracle(EVAL_FUNC_ARG_DECL); static int add_decimalint128_oracle_batch(BATCH_EVAL_FUNC_ARG_DECL); static int add_decimalint128_oracle_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_int8_t(EVAL_FUNC_ARG_DECL); + static int add_collection_collection_int8_t_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_int8_t_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_int16_t(EVAL_FUNC_ARG_DECL); + static int add_collection_collection_int16_t_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_int16_t_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_int32_t(EVAL_FUNC_ARG_DECL); + static int add_collection_collection_int32_t_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_int32_t_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_int64_t(EVAL_FUNC_ARG_DECL); + static int add_collection_collection_int64_t_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_int64_t_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_float(EVAL_FUNC_ARG_DECL); + static int add_collection_collection_float_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_float_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_double(EVAL_FUNC_ARG_DECL); + static int add_collection_collection_double_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_double_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_uint64_t(EVAL_FUNC_ARG_DECL); + static int add_collection_collection_uint64_t_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int add_collection_collection_uint64_t_vector(VECTOR_EVAL_FUNC_ARG_DECL); private: static ObArithFunc add_funcs_[common::ObMaxTC]; diff --git a/src/sql/engine/expr/ob_expr_arithmetic_result_type.map b/src/sql/engine/expr/ob_expr_arithmetic_result_type.map index 35dc4e4806..d25ad45b3f 100644 --- a/src/sql/engine/expr/ob_expr_arithmetic_result_type.map +++ b/src/sql/engine/expr/ob_expr_arithmetic_result_type.map @@ -53,7 +53,7 @@ static constexpr ObObjType ARITH_RESULT_TYPE[ObMaxType][ObMaxType] = ObMaxType, /* ObGeometryType */ ObMaxType, /* ObUserDefinedSqlType */ ObDoubleType, /* ObDecimalIntType */ - ObMaxType, /* ObCollectionSQLType*/ + ObCollectionSQLType, /* ObCollectionSQLType*/ ObDoubleType, /* ObMySQLDateType */ ObDoubleType, /* ObMySQLDateTimeType */ ObMaxType, /* ObRoaringBitmapType */ @@ -1351,7 +1351,7 @@ static constexpr ObObjType ARITH_RESULT_TYPE[ObMaxType][ObMaxType] = ObMaxType, /* ObGeometryType */ ObMaxType, /* ObUserDefinedSqlType */ ObDoubleType, /* ObDecimalIntType */ - ObMaxType, /* ObCollectionSQLType*/ + ObCollectionSQLType, /* ObCollectionSQLType*/ ObDoubleType, /* ObMySQLDateType */ ObDoubleType, /* ObMySQLDateTimeType */ ObMaxType, /* ObRoaringBitmapType */ @@ -2986,7 +2986,7 @@ static constexpr ObObjType ARITH_RESULT_TYPE[ObMaxType][ObMaxType] = }, /*ObCollectionSqlType*/ { - ObMaxType, /* NullType */ + ObCollectionSQLType, /* NullType */ ObMaxType, /* TinyIntType */ ObMaxType, /* SmallIntType */ ObMaxType, /* MediumIntType */ @@ -3008,7 +3008,7 @@ static constexpr ObObjType ARITH_RESULT_TYPE[ObMaxType][ObMaxType] = ObMaxType, /* DateType */ ObMaxType, /* TimeType */ ObMaxType, /* YearType */ - ObMaxType, /* VarcharType */ + ObCollectionSQLType, /* VarcharType */ ObMaxType, /* CharType */ ObMaxType, /* HexStringType */ ObMaxType, /* ExtendType */ @@ -3037,7 +3037,7 @@ static constexpr ObObjType ARITH_RESULT_TYPE[ObMaxType][ObMaxType] = ObMaxType, /* ObGeometryType */ ObMaxType, /* ObUserDefinedSqlType */ ObMaxType, /* ObDecimalIntType */ - ObMaxType, /* ObCollectionSQLType*/ + ObCollectionSQLType, /* ObCollectionSQLType*/ ObMaxType, /* ObMySQLDateType */ ObMaxType, /* ObMySQLDateTimeType */ ObMaxType, /* ObRoaringBitmapType */ diff --git a/src/sql/engine/expr/ob_expr_array.cpp b/src/sql/engine/expr/ob_expr_array.cpp new file mode 100644 index 0000000000..b8c85469af --- /dev/null +++ b/src/sql/engine/expr/ob_expr_array.cpp @@ -0,0 +1,251 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation for array. + */ + +#define USING_LOG_PREFIX SQL_ENG +#include "sql/engine/expr/ob_expr_array.h" +#include "lib/udt/ob_collection_type.h" +#include "lib/udt/ob_array_type.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" +#include "sql/engine/expr/ob_array_expr_utils.h" +#include "sql/engine/ob_exec_context.h" + + +using namespace oceanbase::common; +using namespace oceanbase::sql; +using namespace oceanbase::omt; + +namespace oceanbase +{ +namespace sql +{ +ObExprArray::ObExprArray(ObIAllocator &alloc) + : ObFuncExprOperator(alloc, T_FUN_SYS_ARRAY, N_ARRAY, PARAM_NUM_UNKNOWN, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ +} + +ObExprArray::ObExprArray(ObIAllocator &alloc, + ObExprOperatorType type, + const char *name, + int32_t param_num, + int32_t dimension) : ObFuncExprOperator(alloc, type, name, param_num, VALID_FOR_GENERATED_COL, dimension) +{ +} + +ObExprArray::~ObExprArray() +{ +} + +int ObExprArray::calc_result_typeN(ObExprResType& type, + ObExprResType* types_stack, + int64_t param_num, + ObExprTypeCtx& type_ctx) const +{ + int ret = OB_SUCCESS; + ObSQLSessionInfo *session = const_cast(type_ctx.get_session()); + ObExecContext *exec_ctx = OB_ISNULL(session) ? NULL : session->get_cur_exec_ctx(); + ObDataType elem_type; + uint16_t subschema_id; + if (OB_ISNULL(exec_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("exec ctx is null", K(ret)); + } else if (param_num > MAX_ARRAY_ELEMENT_SIZE) { + ret = OB_SIZE_OVERFLOW; + OB_LOG(WARN, "array element size exceed max", K(ret), K(param_num), K(MAX_ARRAY_ELEMENT_SIZE)); + } else if (OB_FAIL(ObArrayExprUtils::deduce_array_element_type(exec_ctx, types_stack, param_num, elem_type))) { + LOG_WARN("failed to deduce array element type", K(ret)); + } else if (ob_is_collection_sql_type(elem_type.get_obj_type())) { + for (int64_t i = 0; i < param_num; ++i) { + if (types_stack[i].get_subschema_id() != elem_type.meta_.get_subschema_id()) { + types_stack[i].set_calc_meta(elem_type.meta_); + } + } + if (OB_FAIL(ObArrayExprUtils::deduce_nested_array_subschema_id(exec_ctx, elem_type, subschema_id))) { + LOG_WARN("failed to deduce nested array subschema id", K(ret)); + } + } else if (OB_FAIL(exec_ctx->get_subschema_id_by_collection_elem_type(ObNestedType::OB_ARRAY_TYPE, + elem_type, subschema_id))) { + LOG_WARN("failed to get collection subschema id", K(ret)); + } + if (OB_SUCC(ret)) { + type.set_collection(subschema_id); + } + return ret; +} + +#define FIXED_SIZE_ARRAY_APPEND(Element_Type, Get_Func) \ + ObArrayFixedSize *array_obj = static_cast *>(arr_obj); \ + if (datum->is_null()) { \ + if (OB_FAIL(array_obj->push_back(0, true))) { \ + LOG_WARN("failed to push back null value", K(ret), K(i)); \ + } \ + } else if (OB_FAIL(array_obj->push_back(datum->Get_Func()))) { \ + LOG_WARN("failed to push back value", K(ret), K(i)); \ + } + +int ObExprArray::eval_array(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) +{ + int ret = OB_SUCCESS; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + ObSubSchemaValue value; + uint16_t subschema_id = expr.obj_meta_.get_subschema_id(); + const ObSqlCollectionInfo *coll_info = NULL; + if (OB_FAIL(ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(subschema_id, value))) { + LOG_WARN("failed to get subschema ctx", K(ret)); + } else if (value.type_ >= OB_SUBSCHEMA_MAX_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid subschema type", K(ret), K(value)); + } else { + ObIArrayType *arr_obj = NULL; + coll_info = reinterpret_cast(value.value_); + ObCollectionArrayType *arr_type = static_cast(coll_info->collection_meta_); + if (OB_ISNULL(coll_info)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("collect info is null", K(ret), K(subschema_id)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(tmp_allocator, *arr_type, arr_obj))) { + LOG_WARN("construct array obj failed", K(ret), K(subschema_id), K(coll_info)); + } else { + int num_args = expr.arg_cnt_; + ObCollectionArrayType *arr_type = static_cast(coll_info->collection_meta_); + for (int i = 0; i < num_args && OB_SUCC(ret); i++) { + ObDatum *datum = NULL; + if (OB_FAIL(expr.args_[i]->eval(ctx, datum))) { + LOG_WARN("failed to eval args", K(ret), K(i)); + } else { + if (arr_type->element_type_->type_id_ == ObNestedType::OB_BASIC_TYPE) { + ObCollectionBasicType *elem_type = static_cast(arr_type->element_type_); + switch (elem_type->basic_meta_.get_obj_type()) { + case ObNullType: { + ObArrayFixedSize *null_array = static_cast *>(arr_obj); + if (!datum->is_null()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expect null value", K(ret), K(*datum)); + } else if (OB_FAIL(null_array->push_null())) { + LOG_WARN("failed to push back null value", K(ret), K(i)); + } + break; + } + case ObTinyIntType: { + FIXED_SIZE_ARRAY_APPEND(int8_t, get_tinyint); + break; + } + case ObInt32Type: { + FIXED_SIZE_ARRAY_APPEND(int32_t, get_int32); + break; + } + case ObIntType: { + FIXED_SIZE_ARRAY_APPEND(int64_t, get_int); + break; + } + case ObUInt64Type: { + FIXED_SIZE_ARRAY_APPEND(uint64_t, get_uint64); + break; + } + case ObFloatType: { + FIXED_SIZE_ARRAY_APPEND(float, get_float); + break; + } + case ObDoubleType: { + FIXED_SIZE_ARRAY_APPEND(double, get_double); + break; + } + case ObVarcharType: { + ObArrayBinary *binary_array = static_cast(arr_obj); + if (datum->is_null()) { + if (OB_FAIL(binary_array->push_back(ObString(), true))) { + LOG_WARN("failed to push back null value", K(ret), K(i)); + } + } else if (OB_FAIL(binary_array->push_back(datum->get_string()))) { + LOG_WARN("failed to push back null value", K(ret), K(i)); + } + break; + } + default: + ret = OB_NOT_SUPPORTED; + LOG_WARN("unsupported element type", K(ret), K(subschema_id), K(elem_type->basic_meta_.get_obj_type())); + } + } else if (arr_type->element_type_->type_id_ == ObNestedType::OB_ARRAY_TYPE) { + ObString raw_bin; + uint16_t elem_subid; + ObArrayNested *nest_array = static_cast(arr_obj); + if (datum->is_null()) { + if (OB_FAIL(nest_array->push_null())) { + LOG_WARN("failed to push back null value", K(ret), K(i)); + } + } else if (FALSE_IT(raw_bin = datum->get_string())) { + } else if (FALSE_IT(elem_subid = expr.args_[i]->obj_meta_.get_subschema_id())) { + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(tmp_allocator, + *datum, + expr.args_[i]->datum_meta_, + expr.args_[i]->obj_meta_.has_lob_header(), + raw_bin))) { + LOG_WARN("fail to get real string data", K(ret), K(raw_bin)); + } else if (OB_FAIL(add_elem_to_nested_array(tmp_allocator, ctx, elem_subid, raw_bin, nest_array))) { + LOG_WARN("failed to push back value", K(ret), K(i)); + } + } + } + } + if (OB_SUCC(ret)) { + ObString res_str; + if (OB_FAIL(ObArrayExprUtils::set_array_res(arr_obj, arr_obj->get_raw_binary_len(), expr, ctx, res_str))) { + LOG_WARN("get array binary string failed", K(ret), K(*coll_info)); + } else { + res.set_string(res_str); + } + } + } + } + return ret; +} + +int ObExprArray::add_elem_to_nested_array(ObIAllocator &tmp_allocator, ObEvalCtx &ctx, uint16_t subschema_id, + ObString &raw_bin, ObArrayNested *nest_array) +{ + int ret = OB_SUCCESS; + ObSubSchemaValue value; + if (OB_FAIL(ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(subschema_id, value))) { + LOG_WARN("failed to get subschema ctx", K(ret)); + } else if (value.type_ >= OB_SUBSCHEMA_MAX_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid subschema type", K(ret), K(value)); + } else { + ObIArrayType *arr_obj = NULL; + const ObSqlCollectionInfo *coll_info = reinterpret_cast(value.value_); + ObCollectionArrayType *arr_type = static_cast(coll_info->collection_meta_); + if (OB_ISNULL(coll_info)) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("collect info is null", K(ret), K(subschema_id)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(tmp_allocator, *arr_type, arr_obj))) { + LOG_WARN("construct array obj failed", K(ret), K(subschema_id), K(coll_info)); + } else if (OB_FAIL(arr_obj->init(raw_bin))) { + LOG_WARN("failed to init array", K(ret)); + } else if (OB_FAIL(nest_array->push_back(*arr_obj))) { + LOG_WARN("failed to push back array", K(ret)); + } + } + return ret; +} + +int ObExprArray::cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + UNUSED(expr_cg_ctx); + UNUSED(raw_expr); + rt_expr.eval_func_ = eval_array; + return OB_SUCCESS; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_array.h b/src/sql/engine/expr/ob_expr_array.h new file mode 100644 index 0000000000..b8a3e19302 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_array.h @@ -0,0 +1,53 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation for array. + */ + +#ifndef OCEANBASE_SQL_OB_EXPR_ARRAY +#define OCEANBASE_SQL_OB_EXPR_ARRAY + +#include "sql/engine/expr/ob_expr_operator.h" +#include "lib/geo/ob_geo_utils.h" +#include "lib/udt/ob_array_type.h" + + +namespace oceanbase +{ +namespace sql +{ +class ObExprArray : public ObFuncExprOperator +{ +public: + explicit ObExprArray(common::ObIAllocator &alloc); + explicit ObExprArray(common::ObIAllocator &alloc, ObExprOperatorType type, + const char *name, int32_t param_num, int32_t dimension); + virtual ~ObExprArray(); + virtual int calc_result_typeN(ObExprResType& type, + ObExprResType* types, + int64_t param_num, + common::ObExprTypeCtx& type_ctx) + const override; + static int eval_array(const ObExpr &expr, + ObEvalCtx &ctx, + ObDatum &res); + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; +private: + + static int add_elem_to_nested_array(ObIAllocator &tmp_allocator, ObEvalCtx &ctx, uint16_t subschema_id, + ObString &raw_bin, ObArrayNested *nest_array); + DISALLOW_COPY_AND_ASSIGN(ObExprArray); +}; + +} // sql +} // oceanbase +#endif // OCEANBASE_SQL_OB_EXPR_ARRAY \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_array_contains.cpp b/src/sql/engine/expr/ob_expr_array_contains.cpp new file mode 100644 index 0000000000..7e070663a3 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_array_contains.cpp @@ -0,0 +1,497 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation for array. + */ + +#define USING_LOG_PREFIX SQL_ENG +#include "sql/engine/expr/ob_expr_array_contains.h" +#include "lib/udt/ob_collection_type.h" +#include "lib/udt/ob_array_type.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" +#include "sql/engine/expr/ob_array_expr_utils.h" +#include "sql/engine/ob_exec_context.h" +#include "sql/engine/expr/ob_expr_result_type_util.h" + + +using namespace oceanbase::common; +using namespace oceanbase::sql; +using namespace oceanbase::omt; + +namespace oceanbase +{ +namespace sql +{ +ObExprArrayContains::ObExprArrayContains(ObIAllocator &alloc) + : ObFuncExprOperator(alloc, T_FUNC_SYS_ARRAY_CONTAINS, N_ARRAY_CONTAINS, 2, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ +} + +ObExprArrayContains::ObExprArrayContains(ObIAllocator &alloc, + ObExprOperatorType type, + const char *name, + int32_t param_num, + int32_t dimension) : ObFuncExprOperator(alloc, type, name, param_num, VALID_FOR_GENERATED_COL, dimension) +{ +} + +ObExprArrayContains::~ObExprArrayContains() +{ +} + +int ObExprArrayContains::calc_result_type2(ObExprResType &type, + ObExprResType &type1, + ObExprResType &type2, + common::ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + ObSQLSessionInfo *session = const_cast(type_ctx.get_session()); + ObExecContext *exec_ctx = OB_ISNULL(session) ? NULL : session->get_cur_exec_ctx(); + ObSubSchemaValue arr_meta; + const ObSqlCollectionInfo *coll_info = NULL; + ObExprResType *type1_ptr = &type1; + ObExprResType *type2_ptr = &type2; + uint16_t subschema_id; + if (OB_ISNULL(exec_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("exec ctx is null", K(ret)); + } else if (OB_ISNULL(type_ctx.get_raw_expr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("raw expr is null", K(ret)); + } else if (type_ctx.get_raw_expr()->get_extra() != 0) { + // It's any operator ,param order is reversed + ObExprResType *type_tmp = type2_ptr; + type2_ptr = type1_ptr; + type1_ptr = type_tmp; + } + + if (OB_FAIL(ret)) { + } else if (!ob_is_collection_sql_type(type1_ptr->get_type())) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_USER_ERROR(OB_ERR_INVALID_TYPE_FOR_OP, ob_obj_type_str(type1_ptr->get_type()), ob_obj_type_str(type2_ptr->get_type())); + } else if (type2_ptr->is_null()) { + // do nothing + } else if (OB_FAIL(exec_ctx->get_sqludt_meta_by_subschema_id(type1_ptr->get_subschema_id(), arr_meta))) { + LOG_WARN("failed to get elem meta.", K(ret), K(type1_ptr->get_subschema_id())); + } else if (arr_meta.type_ != ObSubSchemaType::OB_SUBSCHEMA_COLLECTION_TYPE) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("invalid subschema type", K(ret), K(arr_meta.type_)); + } else if (OB_ISNULL(coll_info = static_cast(arr_meta.value_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("coll info is null", K(ret)); + } else if (!ob_is_collection_sql_type(type2_ptr->get_type())) { + ObCollectionArrayType *arr_type = static_cast(coll_info->collection_meta_); + ObCollectionTypeBase *elem_type = arr_type->element_type_; + if (elem_type->type_id_ == ObNestedType::OB_BASIC_TYPE) { + if (ob_obj_type_class(type2_ptr->get_type()) != static_cast(elem_type)->basic_meta_.get_type_class()) { + ObObjType calc_type = type2_ptr->get_type(); + if (type2_ptr->get_type() == ObDecimalIntType) { + calc_type = ObDoubleType; + if (get_decimalint_type(type2_ptr->get_precision()) == DECIMAL_INT_32) { + calc_type = ObFloatType; + } + } + if (calc_type == static_cast(elem_type)->basic_meta_.get_obj_type()) { + type2_ptr->set_calc_type(calc_type); + } else { + uint32_t depth = 0; + ObDataType coll_elem1_type; + ObExprResType deduce_type; + bool is_vec = false; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObArrayExprUtils::get_array_element_type(exec_ctx, type1_ptr->get_subschema_id(), coll_elem1_type, depth, is_vec))) { + LOG_WARN("failed to get array element type", K(ret)); + } else if (OB_FAIL(ObExprResultTypeUtil::get_array_calc_type(exec_ctx, coll_elem1_type.get_obj_type(), calc_type, + depth, deduce_type, calc_type))) { + LOG_WARN("failed to get array calc type", K(ret)); + } else { + type1_ptr->set_calc_meta(deduce_type); + type2_ptr->set_calc_type(calc_type); + } + } + } + } else { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("invalid obj type", K(ret), K(*coll_info), K(type2_ptr->get_type())); + } + } else { + // type2_ptr->is array + ObString child_def; + uint16_t child_subschema_id; + ObExprResType child_type; + ObExprResType coll_calc_type; + if (OB_FAIL(coll_info->get_child_def_string(child_def))) { + LOG_WARN("failed to get type1 child define", K(ret), K(*coll_info)); + } else if (OB_FAIL(exec_ctx->get_subschema_id_by_type_string(child_def, child_subschema_id))) { + LOG_WARN("failed to get type1 child subschema id", K(ret), K(*coll_info), K(child_def)); + } else if (child_subschema_id == type2_ptr->get_subschema_id()) { + // do nothing + } else if (FALSE_IT(child_type.set_collection(child_subschema_id))) { + } else if (OB_FAIL(ObExprResultTypeUtil::get_array_calc_type(exec_ctx, child_type, *type2_ptr, coll_calc_type))) { + LOG_WARN("failed to check array compatibilty", K(ret)); + } else { + if (type2_ptr->get_subschema_id() != coll_calc_type.get_subschema_id()) { + type2_ptr->set_calc_meta(coll_calc_type); + } + if (child_type.get_subschema_id() != coll_calc_type.get_subschema_id()) { + ObDataType child_calc_type; + uint16_t type1_calc_id; + child_calc_type.meta_.set_collection(coll_calc_type.get_subschema_id()); + if (OB_FAIL(ObArrayExprUtils::deduce_nested_array_subschema_id(exec_ctx, child_calc_type, type1_calc_id))) { + LOG_WARN("failed to deduce nested array subschema id", K(ret)); + } else { + coll_calc_type.set_collection(type1_calc_id); + type1_ptr->set_calc_meta(coll_calc_type); + } + } + } + } + if (OB_SUCC(ret)) { + type.set_int32(); + type.set_scale(common::ObAccuracy::DDL_DEFAULT_ACCURACY[common::ObIntType].scale_); + type.set_precision(common::ObAccuracy::DDL_DEFAULT_ACCURACY[common::ObIntType].precision_); + } + + return ret; +} + +#define EVAL_FUNC_ARRAY_CONTAINS(TYPE, GET_FUNC) \ + int ObExprArrayContains::eval_array_contains_##TYPE(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) \ + { \ + int ret = OB_SUCCESS; \ + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); \ + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); \ + uint32_t p0 = expr.extra_ == 1 ? 1 : 0; \ + uint32_t p1 = expr.extra_ == 1 ? 0 : 1; \ + const uint16_t meta_id = expr.args_[p0]->obj_meta_.get_subschema_id(); \ + ObIArrayType *arr_obj = NULL; \ + ObDatum *datum = NULL; \ + ObDatum *datum_val = NULL; \ + TYPE val; \ + bool bret = false; \ + if (OB_FAIL(expr.args_[p0]->eval(ctx, datum))) { \ + LOG_WARN("failed to eval args", K(ret)); \ + } else if (OB_FAIL(expr.args_[p1]->eval(ctx, datum_val))) { \ + LOG_WARN("failed to eval args", K(ret)); \ + } else if (OB_FAIL(ObArrayExprUtils::get_array_obj(tmp_allocator, ctx, meta_id, datum->get_string(), arr_obj))) { \ + LOG_WARN("construct array obj failed", K(ret)); \ + } else if (FALSE_IT(val = datum_val->GET_FUNC())) { \ + } else if (OB_FAIL(ObArrayUtil::contains(*arr_obj, val, bret))) { \ + LOG_WARN("array contains failed", K(ret)); \ + } else { \ + res.set_bool(bret); \ + } \ + return ret; \ + } + +EVAL_FUNC_ARRAY_CONTAINS(int64_t, get_int) +EVAL_FUNC_ARRAY_CONTAINS(float, get_float) +EVAL_FUNC_ARRAY_CONTAINS(double, get_double) +EVAL_FUNC_ARRAY_CONTAINS(ObString, get_string) + +int ObExprArrayContains::eval_array_contains_array(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res) +{ + int ret = OB_SUCCESS; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + uint32_t p0 = expr.extra_ == 1 ? 1 : 0; + uint32_t p1 = expr.extra_ == 1 ? 0 : 1; + const uint16_t l_meta_id = expr.args_[p0]->obj_meta_.get_subschema_id(); + const uint16_t r_meta_id = expr.args_[p1]->obj_meta_.get_subschema_id(); + ObIArrayType *arr_obj = NULL; + ObIArrayType *arr_val = NULL; + ObDatum *datum = NULL; + ObDatum *datum_val = NULL; + bool bret = false; + if (OB_FAIL(expr.args_[p0]->eval(ctx, datum))) { + LOG_WARN("failed to eval args", K(ret)); + } else if (OB_FAIL(expr.args_[p1]->eval(ctx, datum_val))) { + LOG_WARN("failed to eval args", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::get_array_obj(tmp_allocator, ctx, l_meta_id, datum->get_string(), arr_obj))) { + LOG_WARN("construct array obj failed", K(ret)); + } else if (datum_val->is_null()) { + bool contains_null = arr_obj->contain_null(); + res.set_bool(contains_null); + } else if (OB_FAIL(ObArrayExprUtils::get_array_obj(tmp_allocator, ctx, r_meta_id, datum_val->get_string(), arr_val))) { + LOG_WARN("construct array obj failed", K(ret)); + } else if (OB_FAIL(ObArrayUtil::contains(*arr_obj, *arr_val, bret))) { + LOG_WARN("array contains failed", K(ret)); + } else { + res.set_bool(bret); + } + return ret; +} + +#define EVAL_FUNC_ARRAY_CONTAINS_BATCH(TYPE, GET_FUNC) \ + int ObExprArrayContains::eval_array_contains_batch_##TYPE( \ + const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const int64_t batch_size) \ + { \ + int ret = OB_SUCCESS; \ + ObDatumVector res_datum = expr.locate_expr_datumvector(ctx); \ + ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); \ + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); \ + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); \ + uint32_t p0 = expr.extra_ == 1 ? 1 : 0; \ + uint32_t p1 = expr.extra_ == 1 ? 0 : 1; \ + const uint16_t meta_id = expr.args_[p0]->obj_meta_.get_subschema_id(); \ + ObIArrayType *arr_obj = NULL; \ + if (OB_FAIL(expr.args_[p0]->eval_batch(ctx, skip, batch_size))) { \ + LOG_WARN("eval date_unit_datum failed", K(ret)); \ + } else if (OB_FAIL(expr.args_[p1]->eval_batch(ctx, skip, batch_size))) { \ + LOG_WARN("failed to eval batch result args0", K(ret)); \ + } else { \ + ObDatumVector src_array = expr.args_[p0]->locate_expr_datumvector(ctx); \ + ObDatumVector val_array = expr.args_[p1]->locate_expr_datumvector(ctx); \ + for (int64_t j = 0; OB_SUCC(ret) && j < batch_size; ++j) { \ + if (skip.at(j) || eval_flags.at(j)) { \ + continue; \ + } \ + eval_flags.set(j); \ + bool bret = false; \ + TYPE val; \ + if (OB_FAIL(ObArrayExprUtils::get_array_obj( \ + tmp_allocator, ctx, meta_id, src_array.at(j)->get_string(), arr_obj))) { \ + LOG_WARN("construct array obj failed", K(ret)); \ + } else if (FALSE_IT(val = val_array.at(j)->GET_FUNC())) { \ + } else if (OB_FAIL(ObArrayUtil::contains(*arr_obj, val, bret))) { \ + LOG_WARN("array contains failed", K(ret)); \ + } else { \ + res_datum.at(j)->set_bool(bret); \ + } \ + } \ + } \ + return ret; \ + } + +EVAL_FUNC_ARRAY_CONTAINS_BATCH(int64_t, get_int) +EVAL_FUNC_ARRAY_CONTAINS_BATCH(float, get_float) +EVAL_FUNC_ARRAY_CONTAINS_BATCH(double, get_double) +EVAL_FUNC_ARRAY_CONTAINS_BATCH(ObString, get_string) + +int ObExprArrayContains::eval_array_contains_array_batch(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const int64_t batch_size) +{ + int ret = OB_SUCCESS; + ObDatumVector res_datum = expr.locate_expr_datumvector(ctx); + ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + uint32_t p0 = expr.extra_ == 1 ? 1 : 0; + uint32_t p1 = expr.extra_ == 1 ? 0 : 1; + const uint16_t l_meta_id = expr.args_[p0]->obj_meta_.get_subschema_id(); + const uint16_t r_meta_id = expr.args_[p1]->obj_meta_.get_subschema_id(); + ObIArrayType *arr_obj = NULL; + ObIArrayType *arr_val = NULL; + if (OB_FAIL(expr.args_[p0]->eval_batch(ctx, skip, batch_size))) { + LOG_WARN("eval date_unit_datum failed", K(ret)); + } else if (OB_FAIL(expr.args_[p1]->eval_batch(ctx, skip, batch_size))) { + LOG_WARN("failed to eval batch result args0", K(ret)); + } else { + ObDatumVector src_array = expr.args_[p0]->locate_expr_datumvector(ctx); + ObDatumVector val_array = expr.args_[p1]->locate_expr_datumvector(ctx); + for (int64_t j = 0; OB_SUCC(ret) && j < batch_size; ++j) { + if (skip.at(j) || eval_flags.at(j)) { + continue; + } + eval_flags.set(j); + bool bret = false; + if (OB_FAIL( + ObArrayExprUtils::get_array_obj(tmp_allocator, ctx, l_meta_id, src_array.at(j)->get_string(), arr_obj))) { + LOG_WARN("construct array obj failed", K(ret)); + } else if (val_array.at(j)->is_null()) { + bool contains_null = arr_obj->contain_null(); + res_datum.at(j)->set_bool(contains_null); + } else if (OB_FAIL(ObArrayExprUtils::get_array_obj( + tmp_allocator, ctx, r_meta_id, val_array.at(j)->get_string(), arr_val))) { + LOG_WARN("construct array obj failed", K(ret)); + } else if (OB_FAIL(ObArrayUtil::contains(*arr_obj, *arr_val, bret))) { + LOG_WARN("array contains failed", K(ret)); + } else { + res_datum.at(j)->set_bool(bret); + } + } + } + return ret; +} + +#define EVAL_FUNC_ARRAY_CONTAINS_VECTOR(TYPE, GET_FUNC) \ + int ObExprArrayContains::eval_array_contains_vector_##TYPE( \ + const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const EvalBound &bound) \ + { \ + int ret = OB_SUCCESS; \ + uint32_t p0 = expr.extra_ == 1 ? 1 : 0; \ + uint32_t p1 = expr.extra_ == 1 ? 0 : 1; \ + if (OB_FAIL(expr.args_[p0]->eval_vector(ctx, skip, bound)) || \ + OB_FAIL(expr.args_[p1]->eval_vector(ctx, skip, bound))) { \ + LOG_WARN("fail to eval params", K(ret)); \ + } else { \ + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); \ + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); \ + ObIVector *left_vec = expr.args_[p0]->get_vector(ctx); \ + VectorFormat left_format = left_vec->get_format(); \ + ObIVector *right_vec = expr.args_[p1]->get_vector(ctx); \ + const uint16_t meta_id = expr.args_[p0]->obj_meta_.get_subschema_id(); \ + ObIVector *res_vec = expr.get_vector(ctx); \ + ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); \ + ObIArrayType *arr_obj = NULL; \ + TYPE val; \ + for (int64_t idx = bound.start(); OB_SUCC(ret) && idx < bound.end(); ++idx) { \ + if (skip.at(idx) || eval_flags.at(idx)) { \ + continue; \ + } else if (left_format == VEC_UNIFORM || left_format == VEC_UNIFORM_CONST) { \ + ObString left = left_vec->get_string(idx); \ + if (OB_FAIL(ObNestedVectorFunc::construct_param(tmp_allocator, ctx, meta_id, left, arr_obj))) { \ + LOG_WARN("construct array obj failed", K(ret)); \ + } \ + } else if (OB_FAIL(ObNestedVectorFunc::construct_attr_param( \ + tmp_allocator, ctx, *expr.args_[p0], meta_id, idx, arr_obj))) { \ + LOG_WARN("construct array obj failed", K(ret)); \ + } \ + bool bret = false; \ + if (OB_FAIL(ret)) { \ + } else if (right_vec->is_null(idx)) { \ + bool contains_null = arr_obj->contain_null(); \ + res_vec->set_bool(idx, contains_null); \ + eval_flags.set(idx); \ + } else if (FALSE_IT(val = right_vec->GET_FUNC(idx))) { \ + } else if (OB_FAIL(ObArrayUtil::contains(*arr_obj, val, bret))) { \ + LOG_WARN("array contains failed", K(ret)); \ + } else { \ + res_vec->set_bool(idx, bret); \ + eval_flags.set(idx); \ + } \ + } \ + } \ + return ret; \ + } + +EVAL_FUNC_ARRAY_CONTAINS_VECTOR(int64_t, get_int) +EVAL_FUNC_ARRAY_CONTAINS_VECTOR(float, get_float) +EVAL_FUNC_ARRAY_CONTAINS_VECTOR(double, get_double) +EVAL_FUNC_ARRAY_CONTAINS_VECTOR(ObString, get_string) + +int ObExprArrayContains::eval_array_contains_array_vector(const ObExpr &expr, ObEvalCtx &ctx, + const ObBitVector &skip, const EvalBound &bound) +{ + int ret = OB_SUCCESS; + uint32_t p0 = expr.extra_ == 1 ? 1 : 0; + uint32_t p1 = expr.extra_ == 1 ? 0 : 1; + if (OB_FAIL(expr.args_[p0]->eval_vector(ctx, skip, bound)) || OB_FAIL(expr.args_[p1]->eval_vector(ctx, skip, bound))) { + LOG_WARN("fail to eval params", K(ret)); + } else { + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + ObIVector *left_vec = expr.args_[p0]->get_vector(ctx); + VectorFormat left_format = left_vec->get_format(); + ObIVector *right_vec = expr.args_[p1]->get_vector(ctx); + VectorFormat right_format = right_vec->get_format(); + const uint16_t left_meta_id = expr.args_[p0]->obj_meta_.get_subschema_id(); + const uint16_t right_meta_id = expr.args_[p1]->obj_meta_.get_subschema_id(); + ObIVector *res_vec = expr.get_vector(ctx); + ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); + ObIArrayType *arr_obj = NULL; + ObIArrayType *arr_val = NULL; + for (int64_t idx = bound.start(); OB_SUCC(ret) && idx < bound.end(); ++idx) { + if (skip.at(idx) || eval_flags.at(idx)) { + continue; + } else if (left_format == VEC_UNIFORM || left_format == VEC_UNIFORM_CONST) { + ObString left = left_vec->get_string(idx); + if (OB_FAIL(ObNestedVectorFunc::construct_param(tmp_allocator, ctx, left_meta_id, left, arr_obj))) { + LOG_WARN("construct array obj failed", K(ret)); + } + } else if (OB_FAIL(ObNestedVectorFunc::construct_attr_param( + tmp_allocator, ctx, *expr.args_[p0], left_meta_id, idx, arr_obj))) { + LOG_WARN("construct array obj failed", K(ret)); + } + if (OB_FAIL(ret)) { + } else if (right_vec->is_null(idx)) { + bool contains_null = arr_obj->contain_null(); + res_vec->set_bool(idx, contains_null); + eval_flags.set(idx); + } else if (right_format == VEC_UNIFORM || right_format == VEC_UNIFORM_CONST) { + ObString right = right_vec->get_string(idx); + if (OB_FAIL(ObNestedVectorFunc::construct_param(tmp_allocator, ctx, right_meta_id, right, arr_val))) { + LOG_WARN("construct array obj failed", K(ret)); + } + } else if (OB_FAIL(ObNestedVectorFunc::construct_attr_param( + tmp_allocator, ctx, *expr.args_[p1], right_meta_id, idx, arr_val))) { + LOG_WARN("construct array obj failed", K(ret)); + } + bool bret = false; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObArrayUtil::contains(*arr_obj, *arr_val, bret))) { + LOG_WARN("array contains failed", K(ret)); + } else { + res_vec->set_bool(idx, bret); + eval_flags.set(idx); + } + } + } + + return ret; +} + +int ObExprArrayContains::cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + UNUSED(raw_expr); + if (rt_expr.arg_cnt_ != 2 || OB_ISNULL(rt_expr.args_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("count of children is not 2 or children is null", K(ret), K(rt_expr.arg_cnt_), + K(rt_expr.args_)); + } else if (OB_ISNULL(rt_expr.args_[0]) || OB_ISNULL(rt_expr.args_[1])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("child is null", K(ret), K(rt_expr.args_[0]), K(rt_expr.args_[1])); + } else { + rt_expr.eval_func_ = NULL; + rt_expr.may_not_need_raw_check_ = false; + rt_expr.extra_ = raw_expr.get_extra(); + uint32_t p1 = rt_expr.extra_ == 1 ? 0 : 1; + const ObObjType right_type = rt_expr.args_[p1]->datum_meta_.type_; + const ObObjTypeClass right_tc = ob_obj_type_class(right_type); + switch (right_tc) { + case ObIntTC: + rt_expr.eval_func_ = eval_array_contains_int64_t; + rt_expr.eval_batch_func_ = eval_array_contains_batch_int64_t; + rt_expr.eval_vector_func_ = eval_array_contains_vector_int64_t; + break; + case ObFloatTC: + rt_expr.eval_func_ = eval_array_contains_float; + rt_expr.eval_batch_func_ = eval_array_contains_batch_float; + rt_expr.eval_vector_func_ = eval_array_contains_vector_float; + break; + case ObDoubleTC: + rt_expr.eval_func_ = eval_array_contains_double; + rt_expr.eval_batch_func_ = eval_array_contains_batch_double; + rt_expr.eval_vector_func_ = eval_array_contains_vector_double; + break; + case ObStringTC: + rt_expr.eval_func_ = eval_array_contains_ObString; + rt_expr.eval_batch_func_ = eval_array_contains_batch_ObString; + rt_expr.eval_vector_func_ = eval_array_contains_vector_ObString; + break; + case ObNullTC: + case ObCollectionSQLTC: + rt_expr.eval_func_ = eval_array_contains_array; + rt_expr.eval_batch_func_ = eval_array_contains_array_batch; + rt_expr.eval_vector_func_ = eval_array_contains_array_vector; + break; + default : + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("invalid type", K(ret), K(right_type)); + } + } + + return OB_SUCCESS; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_array_contains.h b/src/sql/engine/expr/ob_expr_array_contains.h new file mode 100644 index 0000000000..5c237b909b --- /dev/null +++ b/src/sql/engine/expr/ob_expr_array_contains.h @@ -0,0 +1,62 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation for array_contains. + */ + +#ifndef OCEANBASE_SQL_OB_EXPR_ARRAY_CONTAINS +#define OCEANBASE_SQL_OB_EXPR_ARRAY_CONTAINS + +#include "sql/engine/expr/ob_expr_operator.h" +#include "lib/geo/ob_geo_utils.h" +#include "lib/udt/ob_array_type.h" + + +namespace oceanbase +{ +namespace sql +{ +class ObExprArrayContains : public ObFuncExprOperator +{ +public: + explicit ObExprArrayContains(common::ObIAllocator &alloc); + explicit ObExprArrayContains(common::ObIAllocator &alloc, ObExprOperatorType type, + const char *name, int32_t param_num, int32_t dimension); + virtual ~ObExprArrayContains(); + virtual int calc_result_type2(ObExprResType &type, + ObExprResType &type1, + ObExprResType &type2, + common::ObExprTypeCtx &type_ctx) const override; + static int eval_array_contains_int64_t(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); + static int eval_array_contains_float(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); + static int eval_array_contains_double(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); + static int eval_array_contains_ObString(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); + static int eval_array_contains_array(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res); + static int eval_array_contains_array_batch(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const int64_t batch_size); + static int eval_array_contains_batch_int64_t(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const int64_t batch_size); + static int eval_array_contains_batch_float(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const int64_t batch_size); + static int eval_array_contains_batch_double(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const int64_t batch_size); + static int eval_array_contains_batch_ObString(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const int64_t batch_size); + static int eval_array_contains_vector_int64_t(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const EvalBound &bound); + static int eval_array_contains_vector_float(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const EvalBound &bound); + static int eval_array_contains_vector_double(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const EvalBound &bound); + static int eval_array_contains_vector_ObString(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const EvalBound &bound); + static int eval_array_contains_array_vector(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const EvalBound &bound); + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; +private: + + DISALLOW_COPY_AND_ASSIGN(ObExprArrayContains); +}; + +} // sql +} // oceanbase +#endif // OCEANBASE_SQL_OB_EXPR_ARRAY_CONTAINS \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_cast.cpp b/src/sql/engine/expr/ob_expr_cast.cpp index 9230ad5842..5fa146ab58 100644 --- a/src/sql/engine/expr/ob_expr_cast.cpp +++ b/src/sql/engine/expr/ob_expr_cast.cpp @@ -352,7 +352,7 @@ int ObExprCast::calc_result_type2(ObExprResType &type, type1.get_collation_level(), cs_level))) { LOG_WARN("failed to get collation level", K(ret)); - } else if (FALSE_IT(dst_type.set_collation_level(cs_level))) { + } else if (!dst_type.is_collection_sql_type() && FALSE_IT(dst_type.set_collation_level(cs_level))) { } else if (OB_FAIL(adjust_udt_cast_type(type1, dst_type, type_ctx))) { LOG_WARN("adjust udt cast sub type failed", K(ret)); } else if (OB_UNLIKELY(!cast_supported(type1.get_type(), type1.get_collation_type(), @@ -654,6 +654,11 @@ int ObExprCast::get_cast_type(const bool enable_decimal_int, || ob_is_user_defined_sql_type(obj_type) || ob_is_collection_sql_type(obj_type)) { dst_type.set_udt_id(param_type2.get_udt_id()); + if (ob_is_collection_sql_type(obj_type)) { + // recover subschema id + dst_type.set_collation_type(static_cast(parse_node.int16_values_[OB_NODE_CAST_COLL_IDX])); + dst_type.set_collation_level(static_cast(parse_node.int16_values_[OB_NODE_CAST_CS_LEVEL_IDX])); + } } else if (lib::is_mysql_mode() && ob_is_json(obj_type)) { dst_type.set_collation_type(CS_TYPE_UTF8MB4_BIN); } else if (ob_is_geometry(obj_type)) { @@ -815,9 +820,15 @@ int ObExprCast::adjust_udt_cast_type(const ObExprResType &src_type, uint16_t subschema_id = ObMaxSystemUDTSqlType; if (!ObObjUDTUtil::ob_is_supported_sql_udt(dst_type.get_udt_id())) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("unsupported udt type for sql udt", K(ret), K(src_type), K(dst_type), - K(dst_type.get_udt_id()), K(src_type.get_udt_id())); + // maybe is array type, check subschema id validity + subschema_id = dst_type.get_subschema_id(); + ObSubSchemaValue sub_meta; + if (OB_ISNULL(exec_ctx)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("need ctx to get subschema mapping", K(ret), K(src_type), K(dst_type), KP(session)); + } else if (OB_FAIL(exec_ctx->get_sqludt_meta_by_subschema_id(subschema_id, sub_meta))) { + LOG_WARN("failed to get udt meta", K(ret), K(subschema_id)); + } } else if (udt_type_id == T_OBJ_XML) { subschema_id = 0; } else if (OB_ISNULL(exec_ctx)) { diff --git a/src/sql/engine/expr/ob_expr_cmp_func.cpp b/src/sql/engine/expr/ob_expr_cmp_func.cpp index c7ddb5b786..a8ac387b6e 100644 --- a/src/sql/engine/expr/ob_expr_cmp_func.cpp +++ b/src/sql/engine/expr/ob_expr_cmp_func.cpp @@ -18,6 +18,9 @@ #include "sql/engine/expr/ob_expr_operator.h" #include "sql/engine/expr/ob_batch_eval_util.h" #include "share/ob_lob_access_utils.h" +#include "lib/udt/ob_array_type.h" +#include "sql/engine/ob_subschema_ctx.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase { @@ -45,8 +48,8 @@ template <> constexpr int get_cmp_ret (const int ret) { return ret > 0; } template <> constexpr int get_cmp_ret (const int ret) { return ret != 0; } template <> constexpr int get_cmp_ret (const int ret) { return ret; } -template -int def_relational_eval_func(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum) +template +int def_relational_eval_func(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum, Args &...args) { int ret = OB_SUCCESS; ObDatum *l = NULL; @@ -60,7 +63,7 @@ int def_relational_eval_func(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_d ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid operands", K(ret), K(l), K(r)); } else { - ret = DatumFunc()(expr_datum, *l, *r); + ret = DatumFunc()(expr_datum, *l, *r, args...); } } return ret; @@ -368,6 +371,54 @@ struct ObRelationalGeoFunc } }; +// cmp for collection +template +struct ObRelationalCollectionFunc{}; + +template +struct ObRelationalCollectionFunc : ObDummyRelationalFunc {}; + +template +struct ObRelationalCollectionFunc +{ + struct DatumCmp + { + int operator()(ObDatum &res, const ObDatum &l, const ObDatum &r, const ObExpr &expr, ObEvalCtx &ctx) const + { + int ret = OB_SUCCESS; + int cmp_ret = 0; + ObString left = l.get_string(); + ObString right = r.get_string(); + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + const uint16_t left_meta_id = expr.args_[0]->obj_meta_.get_subschema_id(); + const uint16_t right_meta_id = expr.args_[1]->obj_meta_.get_subschema_id(); + ObIArrayType *left_obj = NULL; + ObIArrayType *right_obj = NULL; + if (OB_FAIL(ObNestedArithOpBaseFunc::construct_param(tmp_allocator, ctx, left_meta_id, left, left_obj))) { + LOG_WARN("construct left param failed", K(ret), K(left_meta_id)); + } else if (OB_FAIL(ObNestedArithOpBaseFunc::construct_param(tmp_allocator, ctx, right_meta_id, right, right_obj))) { + LOG_WARN("construct left param failed", K(ret), K(left_meta_id)); + } else if (OB_FAIL(left_obj->compare(*right_obj, cmp_ret))) { + LOG_WARN("array do compare failed", K(ret), K(left_meta_id), K(right_meta_id)); + } else { + res.set_int(get_cmp_ret(cmp_ret)); + } + return ret; + } + }; + + inline static int eval(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum) + { + return def_relational_eval_func(expr, ctx, expr_datum, expr, ctx); + } + + inline static int eval_batch(BATCH_EVAL_FUNC_ARG_DECL) + { + return def_relational_eval_batch_func(BATCH_EVAL_FUNC_ARG_LIST, expr, ctx); + } +}; + // define null, extend, string evaluate batch functions. template struct ObRelationalExtraFunc @@ -621,6 +672,84 @@ struct ObRelationalExtraFunc } }; +template +struct ObRelationalVecFunc +{ + struct DatumCmp + { + int operator()(ObDatum &res, const ObDatum &l, const ObDatum &r, const ObExpr &expr, ObEvalCtx &ctx) const + { + int ret = OB_SUCCESS; + int cmp_ret = 0; + const ObExpr &left_expr = *expr.args_[0]; + const ObExpr &right_expr = *expr.args_[1]; + ObIArrayType *arr_l = NULL; + ObIArrayType *arr_r = NULL; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + if (OB_FAIL(ObArrayExprUtils::get_type_vector(left_expr, l, ctx, tmp_allocator, arr_l))) { + LOG_WARN("failed to get vector", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::get_type_vector(right_expr, r, ctx, tmp_allocator, arr_r))) { + LOG_WARN("failed to get vector", K(ret)); + } else if (OB_ISNULL(arr_l) || OB_ISNULL(arr_r)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(arr_l), K(arr_r)); + } else if (OB_UNLIKELY(arr_l->size() != arr_r->size())) { + ret = OB_ERR_INVALID_VECTOR_DIM; + LOG_WARN("check array validty failed", K(ret), K(arr_l->size()), K(arr_r->size())); + } else if (arr_l->contain_null() || arr_r->contain_null()) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("array with null can't cmp", K(ret)); + } else { + const float *data_l = reinterpret_cast(arr_l->get_data()); + const float *data_r = reinterpret_cast(arr_r->get_data()); + const uint32_t size = arr_l->size(); + for (int64_t i = 0; i < size && cmp_ret == 0; ++i) { + if (isnan(data_l[i]) || isnan(data_r[i])) { + if (isnan(data_l[i]) && isnan(data_r[i])) { + cmp_ret = 0; + } else if (isnan(data_l[i])) { + // l is nan, r is not nan:left always bigger than right + cmp_ret = 1; + } else { + // l is not nan, r is nan, left always less than right + cmp_ret = -1; + } + } else { + cmp_ret = data_l[i] == data_r[i] ? 0 : (data_l[i] < data_r[i] ? -1 : 1); + } + } + res.set_int(get_cmp_ret(cmp_ret)); + } + return ret; + } + }; + + inline static int eval(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum) + { + int ret = OB_SUCCESS; + ObDatum *l = NULL; + ObDatum *r = NULL; + bool contain_null = false; + if (OB_FAIL(ObRelationalExprOperator::get_comparator_operands( + expr, ctx, l, r, expr_datum, contain_null))) { + LOG_WARN("failed to eval args", K(ret)); + } else if (!contain_null) { + if (OB_ISNULL(l) || OB_ISNULL(r)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid operands", K(ret), K(l), K(r)); + } else { + ret = DatumCmp()(expr_datum, *l, *r, expr, ctx); + } + } + return ret; + } + inline static int eval_batch(BATCH_EVAL_FUNC_ARG_DECL) + { + return def_relational_eval_batch_func(BATCH_EVAL_FUNC_ARG_LIST, expr, ctx); + } +}; + static ObExpr::EvalBatchFunc EVAL_BATCH_NULL_EXTEND_CMP_FUNCS[CO_MAX]; static ObExpr::EvalBatchFunc EVAL_BATCH_STR_CMP_FUNCS[CO_MAX]; @@ -629,6 +758,7 @@ static ObExpr::EvalBatchFunc EVAL_BATCH_TEXT_STR_CMP_FUNCS[CO_MAX]; static ObExpr::EvalBatchFunc EVAL_BATCH_STR_TEXT_CMP_FUNCS[CO_MAX]; static ObExpr::EvalBatchFunc EVAL_BATCH_JSON_CMP_FUNCS[CO_MAX]; static ObExpr::EvalBatchFunc EVAL_BATCH_GEO_CMP_FUNCS[CO_MAX]; +static ObExpr::EvalBatchFunc EVAL_BATCH_COLLECTION_CMP_FUNCS[CO_MAX]; static ObExpr::EvalFunc EVAL_TYPE_CMP_FUNCS[ObMaxType][ObMaxType][CO_MAX]; static ObExpr::EvalBatchFunc EVAL_BATCH_TYPE_CMP_FUNCS[ObMaxType][ObMaxType][CO_MAX]; @@ -651,6 +781,8 @@ static ObExpr::EvalFunc EVAL_JSON_CMP_FUNCS[CO_MAX][2]; static ObDatumCmpFuncType DATUM_JSON_CMP_FUNCS[2]; static ObExpr::EvalFunc EVAL_GEO_CMP_FUNCS[CO_MAX][2]; static ObDatumCmpFuncType DATUM_GEO_CMP_FUNCS[2]; +static ObExpr::EvalFunc EVAL_COLLECTION_CMP_FUNCS[CO_MAX][2]; +static ObDatumCmpFuncType DATUM_COLLECTION_CMP_FUNCS[2]; static ObExpr::EvalFunc EVAL_FIXED_DOUBLE_CMP_FUNCS[OB_NOT_FIXED_SCALE][CO_MAX]; static ObExpr::EvalBatchFunc EVAL_BATCH_FIXED_DOUBLE_CMP_FUNCS[OB_NOT_FIXED_SCALE][CO_MAX]; @@ -661,6 +793,8 @@ static ObExpr::EvalBatchFunc EVAL_BATCH_DECINT_CMP_FUNCS[DECIMAL_INT_MAX][DECIMA static ObDatumCmpFuncType DATUM_DECINT_CMP_FUNCS[DECIMAL_INT_MAX][DECIMAL_INT_MAX]; +static ObExpr::EvalFunc EVAL_VEC_CMP_FUNCS[CO_MAX]; +static ObExpr::EvalBatchFunc EVAL_BATCH_VEC_CMP_FUNCS[CO_MAX]; template struct ExtraExprCmpIniter { @@ -932,6 +1066,34 @@ struct DatumGeoExprCmpIniter } }; +template +struct CollectionExprFuncIniter +{ + template + using EvalCmp = ObRelationalCollectionFunc(Y)>; + static void init_array() + { + EVAL_COLLECTION_CMP_FUNCS[Y][0] = EvalCmp<0>::eval; + EVAL_COLLECTION_CMP_FUNCS[Y][1] = EvalCmp<1>::eval; + EVAL_BATCH_COLLECTION_CMP_FUNCS[Y] = EvalCmp<1>::eval_batch; + } +}; + +template +struct DatumCollectionExprCmpIniter +{ + template + using DatumCmp = datum_cmp::ObDatumCollectionCmp; + using Def = datum_cmp::ObDatumCollectionCmp; + static void init_array() + { + DATUM_COLLECTION_CMP_FUNCS[0] = Def::defined_ ? DatumCmp<0>::cmp : NULL; + DATUM_COLLECTION_CMP_FUNCS[1] = Def::defined_ ? DatumCmp<1>::cmp : NULL; + } +}; + int g_init_type_ret = Ob2DArrayConstIniter::init(); int g_init_tc_ret = Ob2DArrayConstIniter::init(); int g_init_str_ret = Ob2DArrayConstIniter::init(); @@ -947,6 +1109,9 @@ int g_init_json_datum_ret = ObArrayConstIniter<1, DatumJsonExprCmpIniter>::init( int g_init_geo_ret = ObArrayConstIniter::init(); int g_init_geo_datum_ret = ObArrayConstIniter<1, DatumGeoExprCmpIniter>::init(); +int g_init_collection_ret = ObArrayConstIniter::init(); +int g_init_collection_datum_ret = ObArrayConstIniter<1, DatumCollectionExprCmpIniter>::init(); + template struct FixedDoubleCmpFuncIniter { @@ -1072,6 +1237,8 @@ ObExpr::EvalFunc ObExprCmpFuncsHelper::get_eval_expr_cmp_func(const ObObjType ty func_ptr = EVAL_JSON_CMP_FUNCS[cmp_op][has_lob_header]; } else if (tc1 == ObGeometryTC && tc2 == ObGeometryTC) { func_ptr = EVAL_GEO_CMP_FUNCS[cmp_op][has_lob_header]; + } else if (tc1 == ObCollectionSQLTC && tc2 == ObCollectionSQLTC) { + func_ptr = EVAL_COLLECTION_CMP_FUNCS[cmp_op][has_lob_header]; } else if (IS_FIXED_DOUBLE) { func_ptr = EVAL_FIXED_DOUBLE_CMP_FUNCS[MAX(scale1, scale2)][cmp_op]; } else if (tc1 == ObDecimalIntTC && tc2 == ObDecimalIntTC) { @@ -1080,6 +1247,8 @@ ObExpr::EvalFunc ObExprCmpFuncsHelper::get_eval_expr_cmp_func(const ObObjType ty OB_ASSERT(lw < DECIMAL_INT_MAX && lw >= 0); OB_ASSERT(rw < DECIMAL_INT_MAX && rw >= 0); func_ptr = EVAL_DECINT_CMP_FUNCS[lw][rw][cmp_op]; + } else if (tc1 == ObCollectionSQLTC && tc2 == ObCollectionSQLTC) { + func_ptr = EVAL_VEC_CMP_FUNCS[cmp_op]; } else if (tc1 == ObUserDefinedSQLTC || tc2 == ObUserDefinedSQLTC) { func_ptr = NULL; //? } else if (!ObDatumFuncs::is_string_type(type1) || !ObDatumFuncs::is_string_type(type2)) { @@ -1134,6 +1303,8 @@ ObExpr::EvalBatchFunc ObExprCmpFuncsHelper::get_eval_batch_expr_cmp_func( if (NULL != EVAL_GEO_CMP_FUNCS[cmp_op][has_lob_header]) { func_ptr = EVAL_BATCH_GEO_CMP_FUNCS[cmp_op]; } + } else if (tc1 == ObCollectionSQLTC && tc2 == ObCollectionSQLTC) { + func_ptr = EVAL_BATCH_COLLECTION_CMP_FUNCS[cmp_op]; } else if (IS_FIXED_DOUBLE) { func_ptr = EVAL_BATCH_FIXED_DOUBLE_CMP_FUNCS[MAX(scale1, scale2)][cmp_op]; } else if (ob_is_decimal_int(type1) && ob_is_decimal_int(type2)) { @@ -1193,6 +1364,8 @@ DatumCmpFunc ObExprCmpFuncsHelper::get_datum_expr_cmp_func(const ObObjType type1 func_ptr = DATUM_JSON_CMP_FUNCS[has_lob_header]; } else if (type1 == ObGeometryType && type2 == ObGeometryType) { func_ptr = DATUM_GEO_CMP_FUNCS[has_lob_header]; + } else if (type1 == ObCollectionSQLType && type2 == ObCollectionSQLType) { + func_ptr = DATUM_COLLECTION_CMP_FUNCS[has_lob_header]; } else if (IS_FIXED_DOUBLE) { func_ptr = DATUM_FIXED_DOUBLE_CMP_FUNCS[MAX(scale1, scale2)]; } else if (ob_is_decimal_int(type1) && ob_is_decimal_int(type2)) { @@ -1375,6 +1548,25 @@ REG_SER_FUNC_ARRAY(OB_SFA_DATUM_CMP_GEO, DATUM_GEO_CMP_FUNCS, sizeof(DATUM_GEO_CMP_FUNCS) / sizeof(void *)); +// Collection cmp functions reg +static_assert(7 == CO_MAX && CO_MAX * 2 == sizeof(EVAL_COLLECTION_CMP_FUNCS) / sizeof(void *), + "unexpected size"); +REG_SER_FUNC_ARRAY(OB_SFA_RELATION_EXPR_COLLECTION_EVAL, + EVAL_COLLECTION_CMP_FUNCS, + sizeof(EVAL_COLLECTION_CMP_FUNCS) / sizeof(void *)); + +static_assert(7 == CO_MAX && CO_MAX == sizeof(EVAL_BATCH_COLLECTION_CMP_FUNCS) / sizeof(void *), + "unexpected size"); +REG_SER_FUNC_ARRAY(OB_SFA_RELATION_EXPR_COLLECTION_EVAL_BATCH, + EVAL_BATCH_COLLECTION_CMP_FUNCS, + sizeof(EVAL_BATCH_COLLECTION_CMP_FUNCS) / sizeof(void *)); + +static_assert(2 == sizeof(DATUM_COLLECTION_CMP_FUNCS) / sizeof(void *), + "unexpected size"); +REG_SER_FUNC_ARRAY(OB_SFA_DATUM_CMP_COLLECTION, + DATUM_COLLECTION_CMP_FUNCS, + sizeof(DATUM_COLLECTION_CMP_FUNCS) / sizeof(void *)); + // Fixed double cmp functions reg static_assert( OB_NOT_FIXED_SCALE * CO_MAX == sizeof(EVAL_FIXED_DOUBLE_CMP_FUNCS) / sizeof(void *), diff --git a/src/sql/engine/expr/ob_expr_column_conv.cpp b/src/sql/engine/expr/ob_expr_column_conv.cpp index 54c8a20de3..d0103bdbf3 100644 --- a/src/sql/engine/expr/ob_expr_column_conv.cpp +++ b/src/sql/engine/expr/ob_expr_column_conv.cpp @@ -256,6 +256,11 @@ int ObExprColumnConv::calc_result_typeN(ObExprResType &type, } else if (OB_ISNULL(exec_ctx)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("need context to search subschema mapping", K(ret), K(udt_id)); + } else if (str_values_.count() > 0) { + // array type + if (OB_FAIL(exec_ctx->get_subschema_id_by_type_string(str_values_.at(0), subschema_id))) { + LOG_WARN("failed to get array type subschema id", K(ret)); + } } else if (OB_FAIL(exec_ctx->get_subschema_id_by_udt_id(udt_id, subschema_id))) { LOG_WARN("failed to get sub schema id", K(ret), K(udt_id)); } diff --git a/src/sql/engine/expr/ob_expr_div.cpp b/src/sql/engine/expr/ob_expr_div.cpp index c1fb9796ba..bfbdaf1fc7 100644 --- a/src/sql/engine/expr/ob_expr_div.cpp +++ b/src/sql/engine/expr/ob_expr_div.cpp @@ -19,6 +19,7 @@ #include "sql/engine/expr/ob_batch_eval_util.h" #include "share/object/ob_obj_cast_util.h" #include "sql/resolver/expr/ob_raw_expr_util.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase @@ -148,6 +149,14 @@ int ObExprDiv::calc_result_type2(ObExprResType &type, } else if (ObIntervalTC == type.get_type_class()) { type.set_scale(ObAccuracy::MAX_ACCURACY2[ORACLE_MODE][type.get_type()].get_scale()); type.set_precision(ObAccuracy::MAX_ACCURACY2[ORACLE_MODE][type.get_type()].get_precision()); + } else if (ObCollectionSQLTC == result_tc) { + // only support vector / int now + if (OB_FAIL(ObArrayExprUtils::calc_cast_type(type1, type_ctx, true/*only_vector*/))) { // here only to avoid type1 cast + LOG_WARN("failed to calc cast type", K(ret), K(type1)); + } else { + type.set_collection(type1.get_subschema_id()); + type2.set_calc_type(ObFloatType); + } } type.unset_result_flag(NOT_NULL_FLAG); // divided by zero } @@ -683,6 +692,17 @@ int ObExprDiv::div_double_vector(VECTOR_EVAL_FUNC_ARG_DECL) VECTOR_EVAL_FUNC_ARG_LIST, expr, is_oracle); } +int ObExprDiv::div_vec(EVAL_FUNC_ARG_DECL) +{ + ObVectorArithFunc::ArithType op_type = ObVectorArithFunc::ArithType::DIV; + return def_arith_eval_func(EVAL_FUNC_ARG_LIST, expr, ctx, op_type); +} +int ObExprDiv::div_vec_batch(BATCH_EVAL_FUNC_ARG_DECL) +{ + ObVectorArithFunc::ArithType op_type = ObVectorArithFunc::ArithType::DIV; + return def_batch_arith_op_by_datum_func(BATCH_EVAL_FUNC_ARG_LIST, expr, ctx, op_type); +} + struct ObNumberDivFunc { int operator()(ObDatum &res, const ObDatum &l, const ObDatum &r, const ObExpr &expr, @@ -1254,6 +1274,10 @@ int ObExprDiv::cg_expr(ObExprCGCtx &op_cg_ctx, set_decimalint_div_func_ptr(rt_expr); break; } + case ObCollectionSQLType: { + SET_DIV_FUNC_PTR(div_vec); + break; + } default: { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected result type", K(ret), K(rt_expr.datum_meta_.type_)); diff --git a/src/sql/engine/expr/ob_expr_div.h b/src/sql/engine/expr/ob_expr_div.h index 8a59242822..a964964491 100644 --- a/src/sql/engine/expr/ob_expr_div.h +++ b/src/sql/engine/expr/ob_expr_div.h @@ -58,6 +58,8 @@ public: static int div_intervalym_number_batch(BATCH_EVAL_FUNC_ARG_DECL); static int div_intervalds_number(EVAL_FUNC_ARG_DECL); static int div_intervalds_number_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int div_vec(EVAL_FUNC_ARG_DECL); + static int div_vec_batch(BATCH_EVAL_FUNC_ARG_DECL); #define DECINC_DIV_EVAL_FUNC_BASIC(L, R) \ static int div_decimalint_##L##_##R(EVAL_FUNC_ARG_DECL); \ diff --git a/src/sql/engine/expr/ob_expr_div_result_type.map b/src/sql/engine/expr/ob_expr_div_result_type.map index e184765e3d..1632b066ae 100644 --- a/src/sql/engine/expr/ob_expr_div_result_type.map +++ b/src/sql/engine/expr/ob_expr_div_result_type.map @@ -2991,13 +2991,13 @@ static constexpr ObObjType DIV_RESULT_TYPE[ObMaxType][ObMaxType] = ObMaxType, /* SmallIntType */ ObMaxType, /* MediumIntType */ ObMaxType, /* Int32Type */ - ObMaxType, /* IntType */ + ObCollectionSQLType, /* IntType */ ObMaxType, /* UTinyIntType */ ObMaxType, /* USmallIntType */ ObMaxType, /* UMediumIntType */ ObMaxType, /* UInt32Type */ ObMaxType, /* UInt64Type */ - ObMaxType, /* FloatType */ + ObCollectionSQLType, /* FloatType */ ObMaxType, /* DoubleType */ ObMaxType, /* UFloatType */ ObMaxType, /* UDoubleType */ diff --git a/src/sql/engine/expr/ob_expr_eval_functions.cpp b/src/sql/engine/expr/ob_expr_eval_functions.cpp index dc4177b5a5..4a04dd1ee7 100644 --- a/src/sql/engine/expr/ob_expr_eval_functions.cpp +++ b/src/sql/engine/expr/ob_expr_eval_functions.cpp @@ -375,6 +375,14 @@ #include "ob_expr_st_symdifference.h" #include "ob_expr_priv_st_asmvtgeom.h" #include "ob_expr_priv_st_makevalid.h" +#include "ob_expr_array.h" +#include "ob_expr_vec_vector.h" +#include "ob_expr_vec_key.h" +#include "ob_expr_vec_scn.h" +#include "ob_expr_vec_vid.h" +#include "ob_expr_vec_data.h" +#include "ob_expr_vec_type.h" +#include "ob_expr_vector.h" #include "ob_expr_func_ceil.h" #include "ob_expr_topn_filter.h" #include "ob_expr_sdo_relate.h" @@ -390,6 +398,7 @@ #include "ob_expr_rb_calc.h" #include "ob_expr_rb_to_string.h" #include "ob_expr_rb_from_string.h" +#include "ob_expr_array_contains.h" #include "ob_expr_audit_log_func.h" #include "ob_expr_can_access_trigger.h" #include "ob_expr_split_part.h" @@ -1214,14 +1223,14 @@ static ObExpr::EvalFunc g_expr_eval_functions[] = { ObExprRbToString::eval_rb_to_string, /* 717 */ ObExprRbFromString::eval_rb_from_string, /* 718 */ NULL, // ObExprRbIterate::eval_rb_iterate, /* 719 */ - NULL, // ObExprArray::eval_array, /* 720 */ - NULL, // ObExprVectorL1Distance::calc_l1_distance, /* 721 */ - NULL, // ObExprVectorL2Distance::calc_l2_distance, /* 722 */ - NULL, // ObExprVectorCosineDistance::calc_cosine_distance, /* 723 */ - NULL, // ObExprVectorIPDistance::calc_inner_product, /* 724 */ - NULL, // ObExprVectorDims::calc_dims, /* 725 */ - NULL, // ObExprVectorNorm::calc_norm, /* 726 */ - NULL, // ObExprVectorDistance::calc_distance, /* 727 */ + ObExprArray::eval_array, /* 720 */ + ObExprVectorL1Distance::calc_l1_distance, /* 721 */ + ObExprVectorL2Distance::calc_l2_distance, /* 722 */ + ObExprVectorCosineDistance::calc_cosine_distance, /* 723 */ + ObExprVectorIPDistance::calc_inner_product, /* 724 */ + ObExprVectorDims::calc_dims, /* 725 */ + ObExprVectorNorm::calc_norm, /* 726 */ + ObExprVectorDistance::calc_distance, /* 727 */ NULL, // ObExprInnerDoubleToInt::eval_inner_double_to_int /* 728 */ NULL, // ObExprInnerDecimalToYear::eval_inner_decimal_to_year /* 729 */ ObExprSm3::eval_sm3, /* 730 */ @@ -1229,14 +1238,14 @@ static ObExpr::EvalFunc g_expr_eval_functions[] = { ObExprSm4Decrypt::eval_sm4_decrypt, /* 732 */ NULL, // ObExprAdd::add_vec_vec, /* 733 */ NULL, // ObExprMinus::minus_vec_vec, /* 734 */ - NULL, // ObExprMul::mul_vec_vec, /* 735 */ - NULL, // ObExprDiv::div_vec, /* 736 */ - NULL, // ObExprVecKey::generate_vec_key, /* 737 */ - NULL, // ObExprVecScn::generate_vec_scn, /* 738 */ - NULL, // ObExprVecVid::generate_vec_id, /* 739 */ - NULL, // ObExprVecData::generate_vec_data, /* 740 */ - NULL, // ObExprVecType::generate_vec_type, /* 741 */ - NULL, // ObExprVecVector::generate_vec_vector, /* 742 */ + ObExprMul::mul_vec_vec, /* 735 */ + ObExprDiv::div_vec, /* 736 */ + ObExprVecKey::generate_vec_key, /* 737 */ + ObExprVecScn::generate_vec_scn, /* 738 */ + ObExprVecVid::generate_vec_id, /* 739 */ + ObExprVecData::generate_vec_data, /* 740 */ + ObExprVecType::generate_vec_type, /* 741 */ + ObExprVecVector::generate_vec_vector, /* 742 */ ObExprRegexp::eval_hs_regexp, /* 743 */ ObExprRegexpCount::eval_hs_regexp_count, /* 744 */ ObExprRegexpInstr::eval_hs_regexp_instr, /* 745 */ @@ -1244,11 +1253,11 @@ static ObExpr::EvalFunc g_expr_eval_functions[] = { ObExprRegexpReplace::eval_hs_regexp_replace, /* 747 */ ObExprRegexpSubstr::eval_hs_regexp_substr, /* 748 */ ObExprColumnConv::column_convert_fast, /* 749 */ - NULL, //ObExprArrayContains::eval_array_contains_int64_t, /* 750 */ - NULL, //ObExprArrayContains::eval_array_contains_float, /* 751 */ - NULL, //ObExprArrayContains::eval_array_contains_double, /* 752 */ - NULL, //ObExprArrayContains::eval_array_contains_ObString, /* 753 */ - NULL, //ObExprArrayContains::eval_array_contains_array, /* 754 */ + ObExprArrayContains::eval_array_contains_int64_t, /* 750 */ + ObExprArrayContains::eval_array_contains_float, /* 751 */ + ObExprArrayContains::eval_array_contains_double, /* 752 */ + ObExprArrayContains::eval_array_contains_ObString, /* 753 */ + ObExprArrayContains::eval_array_contains_array, /* 754 */ ObExprSplitPart::calc_split_part_expr, /* 755 */ }; @@ -1386,17 +1395,17 @@ static ObExpr::EvalBatchFunc g_expr_eval_batch_functions[] = { ObExprTopNFilter::eval_topn_filter_batch, /* 130 */ NULL,//ObRelationalExprOperator::eval_batch_min_max_compare, /* 131 */ NULL,//ObExprBM25::eval_batch_bm25_relevance_expr, /* 132 */ - NULL, // ObExprAdd::add_vec_vec_batch, /* 133 */ - NULL, // ObExprMinus::minus_vec_vec_batch, /* 134 */ - NULL, // ObExprMul::mul_vec_vec_batch, /* 135 */ - NULL, // ObExprDiv::div_vec_batch, /* 136 */ + NULL,// ObExprAdd::add_vec_vec_batch, /* 133 */ + NULL,// ObExprMinus::minus_vec_vec_batch, /* 134 */ + ObExprMul::mul_vec_vec_batch, /* 135 */ + ObExprDiv::div_vec_batch, /* 136 */ ObExprColumnConv::column_convert_batch, /* 137 */ ObExprColumnConv::column_convert_batch_fast, /* 138 */ - NULL, // ObExprArrayContains::eval_array_contains_batch_int64_t, /* 139 */ - NULL, // ObExprArrayContains::eval_array_contains_batch_float, /* 140 */ - NULL, // ObExprArrayContains::eval_array_contains_batch_double, /* 141 */ - NULL, // ObExprArrayContains::eval_array_contains_batch_ObString, /* 142 */ - NULL, // ObExprArrayContains::eval_array_contains_array_batch, /* 143 */ + ObExprArrayContains::eval_array_contains_batch_int64_t, /* 139 */ + ObExprArrayContains::eval_array_contains_batch_float, /* 140 */ + ObExprArrayContains::eval_array_contains_batch_double, /* 141 */ + ObExprArrayContains::eval_array_contains_batch_ObString, /* 142 */ + ObExprArrayContains::eval_array_contains_array_batch, /* 143 */ }; static ObExpr::EvalVectorFunc g_expr_eval_vector_functions[] = { @@ -1517,12 +1526,12 @@ static ObExpr::EvalVectorFunc g_expr_eval_vector_functions[] = { ObExprCeilFloor::calc_ceil_floor_vector, /* 114 */ ObExprRepeat::eval_repeat_vector, /* 115 */ ObExprRegexpReplace::eval_hs_regexp_replace_vector, /* 116 */ - NULL, // ObExprArrayContains::eval_array_contains_vector_int64_t, /* 117 */ - NULL, // ObExprArrayContains::eval_array_contains_vector_float, /* 118 */ - NULL, // ObExprArrayContains::eval_array_contains_vector_double, /* 119 */ - NULL, // ObExprArrayContains::eval_array_contains_vector_ObString, /* 120 */ - NULL, // ObExprArrayContains::eval_array_contains_array_vector, /* 121 */ - ObExprCalcPartitionBase::fast_calc_partition_level_one_vector, /* 122 */ + ObExprArrayContains::eval_array_contains_vector_int64_t, /* 117 */ + ObExprArrayContains::eval_array_contains_vector_float, /* 118 */ + ObExprArrayContains::eval_array_contains_vector_double, /* 119 */ + ObExprArrayContains::eval_array_contains_vector_ObString, /* 120 */ + ObExprArrayContains::eval_array_contains_array_vector, /* 121 */ + ObExprCalcPartitionBase::fast_calc_partition_level_one_vector,/* 122 */ NULL, // ObExprTrim::eval_trim_vector /* 123 */ }; @@ -1734,54 +1743,54 @@ REG_SER_FUNC_ARRAY(OB_SFA_DECIMAL_INT_EXPR_EVAL_BATCH, ARRAYSIZEOF(g_decimal_int_eval_batch_functions)); static ObExpr::EvalFunc g_collection_eval_functions[] = { - NULL, // ObExprAdd::add_collection_collection_int8_t, - NULL, // ObExprAdd::add_collection_collection_int16_t, - NULL, // ObExprAdd::add_collection_collection_int32_t, - NULL, // ObExprAdd::add_collection_collection_int64_t, - NULL, // ObExprAdd::add_collection_collection_float, - NULL, // ObExprAdd::add_collection_collection_double, - NULL, // ObExprMinus::minus_collection_collection_int8_t, - NULL, // ObExprMinus::minus_collection_collection_int16_t, - NULL, // ObExprMinus::minus_collection_collection_int32_t, - NULL, // ObExprMinus::minus_collection_collection_int64_t, - NULL, // ObExprMinus::minus_collection_collection_float, - NULL, // ObExprMinus::minus_collection_collection_double, - NULL, // ObExprAdd::add_collection_collection_uint64_t, - NULL // ObExprMinus::minus_collection_collection_uint64_t, + ObExprAdd::add_collection_collection_int8_t, + ObExprAdd::add_collection_collection_int16_t, + ObExprAdd::add_collection_collection_int32_t, + ObExprAdd::add_collection_collection_int64_t, + ObExprAdd::add_collection_collection_float, + ObExprAdd::add_collection_collection_double, + ObExprMinus::minus_collection_collection_int8_t, + ObExprMinus::minus_collection_collection_int16_t, + ObExprMinus::minus_collection_collection_int32_t, + ObExprMinus::minus_collection_collection_int64_t, + ObExprMinus::minus_collection_collection_float, + ObExprMinus::minus_collection_collection_double, + ObExprAdd::add_collection_collection_uint64_t, + ObExprMinus::minus_collection_collection_uint64_t, }; static ObExpr::EvalBatchFunc g_collection_eval_batch_functions[] = { - NULL, // ObExprAdd::add_collection_collection_int8_t_batch, - NULL, // ObExprAdd::add_collection_collection_int16_t_batch, - NULL, // ObExprAdd::add_collection_collection_int32_t_batch, - NULL, // ObExprAdd::add_collection_collection_int64_t_batch, - NULL, // ObExprAdd::add_collection_collection_float_batch, - NULL, // ObExprAdd::add_collection_collection_double_batch, - NULL, // ObExprMinus::minus_collection_collection_int8_t_batch, - NULL, // ObExprMinus::minus_collection_collection_int16_t_batch, - NULL, // ObExprMinus::minus_collection_collection_int32_t_batch, - NULL, // ObExprMinus::minus_collection_collection_int64_t_batch, - NULL, // ObExprMinus::minus_collection_collection_float_batch, - NULL, // ObExprMinus::minus_collection_collection_double_batch, - NULL, // ObExprAdd::add_collection_collection_uint64_t_batch, - NULL // ObExprMinus::minus_collection_collection_uint64_t_batch, + ObExprAdd::add_collection_collection_int8_t_batch, + ObExprAdd::add_collection_collection_int16_t_batch, + ObExprAdd::add_collection_collection_int32_t_batch, + ObExprAdd::add_collection_collection_int64_t_batch, + ObExprAdd::add_collection_collection_float_batch, + ObExprAdd::add_collection_collection_double_batch, + ObExprMinus::minus_collection_collection_int8_t_batch, + ObExprMinus::minus_collection_collection_int16_t_batch, + ObExprMinus::minus_collection_collection_int32_t_batch, + ObExprMinus::minus_collection_collection_int64_t_batch, + ObExprMinus::minus_collection_collection_float_batch, + ObExprMinus::minus_collection_collection_double_batch, + ObExprAdd::add_collection_collection_uint64_t_batch, + ObExprMinus::minus_collection_collection_uint64_t_batch, }; static ObExpr::EvalVectorFunc g_collection_expr_eval_vector_functions[] = { - NULL, // ObExprAdd::add_collection_collection_int8_t_vector, - NULL, // ObExprAdd::add_collection_collection_int16_t_vector, - NULL, // ObExprAdd::add_collection_collection_int32_t_vector, - NULL, // ObExprAdd::add_collection_collection_int64_t_vector, - NULL, // ObExprAdd::add_collection_collection_float_vector, - NULL, // ObExprAdd::add_collection_collection_double_vector, - NULL, // ObExprMinus::minus_collection_collection_int8_t_vector, - NULL, // ObExprMinus::minus_collection_collection_int16_t_vector, - NULL, // ObExprMinus::minus_collection_collection_int32_t_vector, - NULL, // ObExprMinus::minus_collection_collection_int64_t_vector, - NULL, // ObExprMinus::minus_collection_collection_float_vector, - NULL, // ObExprMinus::minus_collection_collection_double_vector, - NULL, // ObExprAdd::add_collection_collection_uint64_t_vector, - NULL // ObExprMinus::minus_collection_collection_uint64_t_vector, + ObExprAdd::add_collection_collection_int8_t_vector, + ObExprAdd::add_collection_collection_int16_t_vector, + ObExprAdd::add_collection_collection_int32_t_vector, + ObExprAdd::add_collection_collection_int64_t_vector, + ObExprAdd::add_collection_collection_float_vector, + ObExprAdd::add_collection_collection_double_vector, + ObExprMinus::minus_collection_collection_int8_t_vector, + ObExprMinus::minus_collection_collection_int16_t_vector, + ObExprMinus::minus_collection_collection_int32_t_vector, + ObExprMinus::minus_collection_collection_int64_t_vector, + ObExprMinus::minus_collection_collection_float_vector, + ObExprMinus::minus_collection_collection_double_vector, + ObExprAdd::add_collection_collection_uint64_t_vector, + ObExprMinus::minus_collection_collection_uint64_t_vector, }; REG_SER_FUNC_ARRAY(OB_SFA_COLLECTION_EXPR_EVAL, diff --git a/src/sql/engine/expr/ob_expr_is.cpp b/src/sql/engine/expr/ob_expr_is.cpp index 172c0ebe00..a44b25286b 100644 --- a/src/sql/engine/expr/ob_expr_is.cpp +++ b/src/sql/engine/expr/ob_expr_is.cpp @@ -78,6 +78,9 @@ int ObExprIsBase::calc_result_type2(ObExprResType &type, } } else { type1.set_calc_type(type1.get_type()); + if (ob_is_collection_sql_type(type1.get_type())) { + type1.set_calc_subschema_id(type1.get_subschema_id()); // avoid invalid cast + } // query range extract check the calc type of %type. type.set_calc_meta(type1.get_calc_meta()); } diff --git a/src/sql/engine/expr/ob_expr_minus.cpp b/src/sql/engine/expr/ob_expr_minus.cpp index 334c8ca7d1..f35865e521 100644 --- a/src/sql/engine/expr/ob_expr_minus.cpp +++ b/src/sql/engine/expr/ob_expr_minus.cpp @@ -24,6 +24,7 @@ #include "sql/engine/expr/ob_rt_datum_arith.h" #include "sql/resolver/expr/ob_raw_expr_util.h" #include "sql/engine/expr/ob_expr_util.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase { @@ -68,6 +69,38 @@ int ObExprMinus::calc_result_type2(ObExprResType &type, } else if (type.is_decimal_int() && (type1.is_null() || type2.is_null())) { type.set_precision(MAX(type1.get_precision(), type2.get_precision())); type.set_scale(MAX(type1.get_scale(), type2.get_scale())); + } else if (type.is_collection_sql_type()) { + if (type1.is_collection_sql_type() && type2.is_collection_sql_type()) { + ObSQLSessionInfo *session = const_cast(type_ctx.get_session()); + ObExecContext *exec_ctx = OB_ISNULL(session) ? NULL : session->get_cur_exec_ctx(); + if (type1.get_subschema_id() != type2.get_subschema_id()) { + ObExprResType coll_calc_type = type; + if (OB_FAIL(ObExprResultTypeUtil::get_array_calc_type(exec_ctx, type1, type2, coll_calc_type))) { + LOG_WARN("failed to check array compatibilty", K(ret)); + } else { + type1.set_calc_meta(coll_calc_type); + type2.set_calc_meta(coll_calc_type); + type.set_collection(coll_calc_type.get_subschema_id()); + } + } else { + // subschem id in calc_meta is set to uint16_max in ObArithExprOperator::calc_result_type2 + // set real subschema id to calc_meta from meta + type1.set_calc_meta(type1); + type2.set_calc_meta(type2); + type.set_collection(type1.get_subschema_id()); + } + } else { + // only support vector/array/varchar - vector/array/varchar now // array and varchar need cast to array(float) + uint16_t res_subschema_id = UINT16_MAX; + if (OB_FAIL(ObArrayExprUtils::calc_cast_type2(type1, type2, type_ctx, res_subschema_id))) { + LOG_WARN("failed to calc cast type", K(ret), K(type1)); + } else if (UINT16_MAX == res_subschema_id) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected result subschema_id", K(ret)); + } else { + type.set_collection(res_subschema_id); + } + } } else if (OB_UNLIKELY(SCALE_UNKNOWN_YET == type1.get_scale() || SCALE_UNKNOWN_YET == type2.get_scale())) { type.set_scale(NUMBER_SCALE_UNKNOWN_YET); @@ -789,6 +822,41 @@ int ObExprMinus::cg_expr(ObExprCGCtx &op_cg_ctx, break; } break; + case ObCollectionSQLType: { + ObExecContext *exec_ctx = op_cg_ctx.session_->get_cur_exec_ctx(); + const uint16_t sub_id = rt_expr.obj_meta_.get_subschema_id(); + ObObjType elem_type; + uint32_t unused; + bool is_vec = false; + if (OB_FAIL(ObArrayExprUtils::get_array_element_type(exec_ctx, sub_id, elem_type, unused, is_vec))) { + LOG_WARN("failed to get collection elem type", K(ret), K(sub_id)); + } else if (elem_type == ObTinyIntType) { + SET_MINUS_FUNC_PTR(minus_collection_collection_int8_t); + rt_expr.eval_vector_func_ = minus_collection_collection_int8_t_vector; + } else if (elem_type == ObSmallIntType) { + SET_MINUS_FUNC_PTR(minus_collection_collection_int16_t); + rt_expr.eval_vector_func_ = minus_collection_collection_int16_t_vector; + } else if (elem_type == ObInt32Type) { + SET_MINUS_FUNC_PTR(minus_collection_collection_int32_t); + rt_expr.eval_vector_func_ = minus_collection_collection_int32_t_vector; + } else if (elem_type == ObIntType) { + SET_MINUS_FUNC_PTR(minus_collection_collection_int64_t); + rt_expr.eval_vector_func_ = minus_collection_collection_int64_t_vector; + } else if (elem_type == ObFloatType) { + SET_MINUS_FUNC_PTR(minus_collection_collection_float); + rt_expr.eval_vector_func_ = minus_collection_collection_float_vector; + } else if (elem_type == ObDoubleType) { + SET_MINUS_FUNC_PTR(minus_collection_collection_double); + rt_expr.eval_vector_func_ = minus_collection_collection_double_vector; + } else if (elem_type == ObUInt64Type) { + SET_MINUS_FUNC_PTR(minus_collection_collection_uint64_t); + rt_expr.eval_vector_func_ = minus_collection_collection_uint64_t_vector; + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("invalid element type for array operation", K(ret), K(elem_type)); + } + } + break; default: break; } @@ -1814,5 +1882,82 @@ DECINC_MINUS_EVAL_FUNC_ORA_DECL(int128) #undef DECINC_MINUS_EVAL_FUNC_ORA_DECL +template +struct ObArrayMinusFunc : public ObNestedArithOpBaseFunc +{ + int operator()(ObIArrayType &res, const ObIArrayType &l, const ObIArrayType &r) const + { + int ret = OB_SUCCESS; + + if (l.get_format() != r.get_format() || res.get_format() != r.get_format()) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("nested type is mismatch", K(ret), K(l.get_format()), K(r.get_format()), K(res.get_format())); + } else if (l.size() != r.size()) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("nested size is mismatch", K(ret), K(l.size()), K(r.size())); + } else if (l.get_format() != ArrayFormat::Vector && MEMCMP(l.get_nullbitmap(), r.get_nullbitmap(), sizeof(uint8_t) * l.size())) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("nested nullbitmap is mismatch", K(ret)); + } else if (l.get_format() == ArrayFormat::Nested_Array) { + // compare array dimension + const ObArrayNested &left = static_cast(l); + const ObArrayNested &right = static_cast(r); + ObArrayNested &nest_res = static_cast(res); + if (MEMCMP(left.get_nullbitmap(), right.get_nullbitmap(), sizeof(uint8_t) * left.size()) != 0) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("nested nullbitmap is mismatch", K(ret)); + } else if (MEMCMP(left.get_offsets(), right.get_offsets(), sizeof(uint32_t) * left.size()) != 0) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("nested offsets is mismatch", K(ret)); + } else if (OB_FAIL(res.set_null_bitmaps(left.get_nullbitmap(), left.size()))) { + LOG_WARN("nested nullbitmap copy failed", K(ret)); + } else if (OB_FAIL(res.set_offsets(left.get_offsets(), left.size()))) { + LOG_WARN("nested offset copy failed", K(ret)); + } else if (OB_FAIL(operator()(*nest_res.get_child_array(), *left.get_child_array(), *right.get_child_array()))) { + LOG_WARN("nested child array add failed", K(ret)); + } + } else if (l.get_format() != ArrayFormat::Fixed_Size && l.get_format() != ArrayFormat::Vector) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("invaid array type", K(ret), K(l.get_format())); + } else { + T *res_data = NULL; + if (OB_FAIL(l.get_format() != ArrayFormat::Vector && res.set_null_bitmaps(l.get_nullbitmap(), l.size()))) { + LOG_WARN("array nullbitmap copy failed", K(ret)); + } else if (OB_FAIL(static_cast &>(res).get_reserved_data(l.size(), res_data))) { + LOG_WARN("array get resered data failed", K(ret)); + } else { + T *left_data = reinterpret_cast(l.get_data()); + T *right_data = reinterpret_cast(r.get_data()); + for (int64_t i = 0; i < l.size(); ++i) { + res_data[i] = left_data[i] - right_data[i]; + } + } + } + return ret; + } +}; + +#define COLLECTION_MINUS_EVAL_FUNC_DECL(TYPE) \ +int ObExprMinus::minus_collection_collection_##TYPE(EVAL_FUNC_ARG_DECL) \ +{ \ + return def_arith_eval_func>>(EVAL_FUNC_ARG_LIST, expr, ctx); \ +} \ +int ObExprMinus::minus_collection_collection_##TYPE##_batch(BATCH_EVAL_FUNC_ARG_DECL) \ +{ \ + return def_batch_arith_op_by_datum_func>>(BATCH_EVAL_FUNC_ARG_LIST, expr, ctx); \ +} \ +int ObExprMinus::minus_collection_collection_##TYPE##_vector(VECTOR_EVAL_FUNC_ARG_DECL) \ +{ \ + return def_nested_vector_arith_op_func>>(VECTOR_EVAL_FUNC_ARG_LIST, expr, ctx); \ +} + +COLLECTION_MINUS_EVAL_FUNC_DECL(int8_t) +COLLECTION_MINUS_EVAL_FUNC_DECL(int16_t) +COLLECTION_MINUS_EVAL_FUNC_DECL(int32_t) +COLLECTION_MINUS_EVAL_FUNC_DECL(int64_t) +COLLECTION_MINUS_EVAL_FUNC_DECL(float) +COLLECTION_MINUS_EVAL_FUNC_DECL(double) +COLLECTION_MINUS_EVAL_FUNC_DECL(uint64_t) + } } diff --git a/src/sql/engine/expr/ob_expr_minus.h b/src/sql/engine/expr/ob_expr_minus.h index 390e6e86c9..23c266d60a 100644 --- a/src/sql/engine/expr/ob_expr_minus.h +++ b/src/sql/engine/expr/ob_expr_minus.h @@ -179,6 +179,28 @@ public: static int minus_decimalint128_oracle(EVAL_FUNC_ARG_DECL); static int minus_decimalint128_oracle_batch(BATCH_EVAL_FUNC_ARG_DECL); static int minus_decimalint128_oracle_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_int8_t(EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_int8_t_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_int8_t_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_int16_t(EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_int16_t_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_int16_t_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_int32_t(EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_int32_t_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_int32_t_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_int64_t(EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_int64_t_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_int64_t_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_float(EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_float_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_float_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_double(EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_double_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_double_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_uint64_t(EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_uint64_t_batch(BATCH_EVAL_FUNC_ARG_DECL); + static int minus_collection_collection_uint64_t_vector(VECTOR_EVAL_FUNC_ARG_DECL); + public: OB_INLINE static bool is_int_int_out_of_range(int64_t val1, int64_t val2, int64_t res) diff --git a/src/sql/engine/expr/ob_expr_mul.cpp b/src/sql/engine/expr/ob_expr_mul.cpp index 6dde660a55..c5953a671c 100644 --- a/src/sql/engine/expr/ob_expr_mul.cpp +++ b/src/sql/engine/expr/ob_expr_mul.cpp @@ -18,7 +18,7 @@ #include "sql/engine/expr/ob_batch_eval_util.h" #include "sql/resolver/expr/ob_raw_expr_util.h" #include "sql/engine/expr/ob_expr_util.h" - +#include "sql/engine/expr/ob_array_expr_utils.h" using namespace oceanbase::common; @@ -52,6 +52,17 @@ int ObExprMul::calc_result_type2(ObExprResType &type, ob_is_decimal_int(type1.get_type()) && ob_is_decimal_int(type2.get_type()); const bool is_oracle = lib::is_oracle_mode(); if (OB_FAIL(ObArithExprOperator::calc_result_type2(type, type1, type2, type_ctx))) { + } else if (type.is_collection_sql_type()) { + // only support vector/array/varchar * vector/array/varchar now // array and varchar need cast to array(float) + uint16_t res_subschema_id = UINT16_MAX; + if (OB_FAIL(ObArrayExprUtils::calc_cast_type2(type1, type2, type_ctx, res_subschema_id))) { + LOG_WARN("failed to calc cast type", K(ret), K(type1)); + } else if (UINT16_MAX == res_subschema_id) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected result subschema_id", K(ret)); + } else { + type.set_collection(res_subschema_id); + } } else if (type.is_decimal_int() && (type1.is_null() || type2.is_null())) { type.set_precision(MAX(type1.get_precision(), type2.get_precision())); type.set_scale(MAX(type1.get_scale(), type2.get_scale())); @@ -1561,6 +1572,10 @@ int ObExprMul::cg_expr(ObExprCGCtx &op_cg_ctx, set_decimal_int_eval_func(rt_expr, false /*is_oracle*/); break; } + case ObCollectionSQLType: { + SET_MUL_FUNC_PTR(mul_vec_vec); + break; + } default: { break; } @@ -1574,5 +1589,17 @@ int ObExprMul::cg_expr(ObExprCGCtx &op_cg_ctx, } #undef SET_MUL_FUNC_PTR +int ObExprMul::mul_vec_vec(EVAL_FUNC_ARG_DECL) +{ + ObVectorArithFunc::ArithType op_type = ObVectorArithFunc::ArithType::MUL; + return def_arith_eval_func(EVAL_FUNC_ARG_LIST, expr, ctx, op_type); +} + +int ObExprMul::mul_vec_vec_batch(BATCH_EVAL_FUNC_ARG_DECL) +{ + ObVectorArithFunc::ArithType op_type = ObVectorArithFunc::ArithType::MUL; + return def_batch_arith_op_by_datum_func(BATCH_EVAL_FUNC_ARG_LIST, expr, ctx, op_type); +} + } } diff --git a/src/sql/engine/expr/ob_expr_mul.h b/src/sql/engine/expr/ob_expr_mul.h index d1a1afc61c..020b38795d 100644 --- a/src/sql/engine/expr/ob_expr_mul.h +++ b/src/sql/engine/expr/ob_expr_mul.h @@ -221,6 +221,9 @@ public: static int mul_decimalint128_int128_int128_oracle_batch(BATCH_EVAL_FUNC_ARG_DECL); static int mul_decimalint128_int128_int128_oracle_vector(VECTOR_EVAL_FUNC_ARG_DECL); + static int mul_vec_vec(EVAL_FUNC_ARG_DECL); + static int mul_vec_vec_batch(BATCH_EVAL_FUNC_ARG_DECL); + // temporary used, remove after all expr converted virtual int cg_expr(ObExprCGCtx &op_cg_ctx, const ObRawExpr &raw_expr, diff --git a/src/sql/engine/expr/ob_expr_operator.cpp b/src/sql/engine/expr/ob_expr_operator.cpp index ef46ef8826..4fbd8cd5b8 100644 --- a/src/sql/engine/expr/ob_expr_operator.cpp +++ b/src/sql/engine/expr/ob_expr_operator.cpp @@ -35,6 +35,7 @@ #include "sql/resolver/dml/ob_select_stmt.h" #include "share/vector/expr_cmp_func.h" #include "sql/engine/expr/ob_expr_func_round.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase { @@ -1366,6 +1367,10 @@ int ObExprOperator::aggregate_result_type_for_merge( if (OB_FAIL(aggregate_user_defined_sql_type(type, types, param_num))) { LOG_WARN("aggregate_user_defined_sql_type fail", K(ret)); } + } else if (ob_is_collection_sql_type(res_type)) { + if (OB_FAIL(aggregate_collection_sql_type(type, types, param_num))) { + LOG_WARN("aggregate_collection_sql_type fail", K(ret)); + } } } LOG_DEBUG("merged type is", K(type), K(is_oracle_mode)); @@ -1712,6 +1717,28 @@ int ObExprOperator::aggregate_user_defined_sql_type( return ret; } +int ObExprOperator::aggregate_collection_sql_type( + ObExprResType &type, + const ObExprResType *types, + int64_t param_num) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(types) || OB_UNLIKELY(param_num < 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("types is null or param_num is wrong", K(types), K(param_num), K(ret)); + } else { + bool found = false; + for (int64_t i = 0; ! found && i < param_num && OB_SUCC(ret); ++i) { + if (ob_is_collection_sql_type(types[i].get_type())) { + found = true; + // choose the first collection subschema id now + type.set_subschema_id(types[i].get_subschema_id()); + } + } + } + return ret; +} + int ObExprDFMConvertCtx::parse_format(const ObString &format_str, const ObObjType target_type, bool check_format_semantic, @@ -2301,6 +2328,11 @@ int ObExprOperator::calc_cmp_type2(ObExprResType &type, ret = OB_ERR_INVALID_XML_DATATYPE; } LOG_WARN("incorrect cmp type with xml arguments", K(type1), K(type2), K(type_), K(ret)); + } else if ((type1.is_collection_sql_type() || type2.is_collection_sql_type()) + && !(type_ == T_OP_EQ + || type_ == T_OP_NE)) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("Incorrect cmp type with collection arguments", K(type1), K(type2), K(type_), K(ret)); } else if (OB_FAIL(ObExprResultTypeUtil::get_relational_cmp_type(cmp_type, type1.get_type(), type2.get_type()))) { @@ -2336,6 +2368,16 @@ int ObExprOperator::calc_cmp_type2(ObExprResType &type, } } else if (ObRawType == cmp_type) { type.get_calc_meta().set_collation_type(CS_TYPE_BINARY); + } else if (ob_is_collection_sql_type(cmp_type)) { + if (type1.is_collection_sql_type() && !type2.is_collection_sql_type()) { + type.get_calc_meta().set_subschema_id(type1.get_subschema_id()); + } else if (!type1.is_collection_sql_type() && type2.is_collection_sql_type()) { + type.get_calc_meta().set_subschema_id(type2.get_subschema_id()); + } else if (type1.is_collection_sql_type() && type2.is_collection_sql_type()) { + if (type1.get_subschema_id() == type2.get_subschema_id()) { + type.get_calc_meta().set_subschema_id(type1.get_subschema_id()); + } + } } LOG_DEBUG("calc cmp type", K(type1), K(type2), K(type.get_calc_meta()), K(lbt())); } @@ -2628,6 +2670,29 @@ int ObRelationalExprOperator::deduce_cmp_type(const ObExprOperator &expr, type1.set_calc_accuracy(calc_acc); type2.set_calc_accuracy(calc_acc); } + } else if (ob_is_collection_sql_type(cmp_type.get_calc_type())) { + if (type1.is_collection_sql_type() && !type2.is_collection_sql_type()) { + type1.set_calc_meta(type1); + type2.set_calc_meta(type1); + } else if (!type1.is_collection_sql_type() && type2.is_collection_sql_type()) { + type1.set_calc_meta(type2); + type2.set_calc_meta(type2); + } else if (type1.is_collection_sql_type() && type2.is_collection_sql_type()) { + if (type1.get_subschema_id() != type2.get_subschema_id()) { + ObExprResType coll_calc_type = type; + ObSQLSessionInfo *session = const_cast(type_ctx.get_session()); + ObExecContext *exec_ctx = session->get_cur_exec_ctx(); + if (OB_FAIL(ObExprResultTypeUtil::get_array_calc_type(exec_ctx, type1, type2, coll_calc_type))) { + LOG_WARN("failed to check array compatibilty", K(ret)); + } else { + type1.set_calc_meta(coll_calc_type); + type2.set_calc_meta(coll_calc_type); + } + } else { + type1.set_calc_meta(type2); + type2.set_calc_meta(type2); + } + } } } return ret; diff --git a/src/sql/engine/expr/ob_expr_operator.h b/src/sql/engine/expr/ob_expr_operator.h index 618b47f807..9edb0f1f08 100644 --- a/src/sql/engine/expr/ob_expr_operator.h +++ b/src/sql/engine/expr/ob_expr_operator.h @@ -40,6 +40,8 @@ #include "sql/engine/expr/ob_expr_extra_info_factory.h" #include "sql/engine/expr/ob_i_expr_extra_info.h" #include "lib/hash/ob_hashset.h" +#include "share/schema/ob_schema_struct.h" +#include "lib/udt/ob_array_type.h" #include "sql/session/ob_local_session_var.h" @@ -657,6 +659,10 @@ public: ObExprResType &type, const ObExprResType *types, int64_t param_num); + static int aggregate_collection_sql_type( + ObExprResType &type, + const ObExprResType *types, + int64_t param_num); int calc_cmp_type2(ObExprResType &type, const ObExprResType &type1, @@ -1712,6 +1718,7 @@ protected: return ret; } + virtual int calc_result_type2(ObExprResType &type, ObExprResType &type1, ObExprResType &type2, diff --git a/src/sql/engine/expr/ob_expr_operator_factory.cpp b/src/sql/engine/expr/ob_expr_operator_factory.cpp index 143d18c8d2..3575086130 100644 --- a/src/sql/engine/expr/ob_expr_operator_factory.cpp +++ b/src/sql/engine/expr/ob_expr_operator_factory.cpp @@ -440,6 +440,14 @@ #include "sql/engine/expr/ob_expr_st_symdifference.h" #include "sql/engine/expr/ob_expr_priv_st_asmvtgeom.h" #include "sql/engine/expr/ob_expr_priv_st_makevalid.h" +#include "sql/engine/expr/ob_expr_array.h" +#include "sql/engine/expr/ob_expr_vec_vid.h" +#include "sql/engine/expr/ob_expr_vec_type.h" +#include "sql/engine/expr/ob_expr_vec_vector.h" +#include "sql/engine/expr/ob_expr_vec_scn.h" +#include "sql/engine/expr/ob_expr_vec_key.h" +#include "sql/engine/expr/ob_expr_vec_data.h" +#include "sql/engine/expr/ob_expr_vector.h" #include "sql/engine/expr/ob_expr_inner_table_option_printer.h" #include "sql/engine/expr/ob_expr_rb_build_empty.h" #include "sql/engine/expr/ob_expr_rb_is_empty.h" @@ -450,6 +458,7 @@ #include "sql/engine/expr/ob_expr_rb_calc.h" #include "sql/engine/expr/ob_expr_rb_to_string.h" #include "sql/engine/expr/ob_expr_rb_from_string.h" +#include "sql/engine/expr/ob_expr_array_contains.h" #include "sql/engine/expr/ob_expr_lock_func.h" #include "sql/engine/expr/ob_expr_decode_trace_id.h" #include "sql/engine/expr/ob_expr_topn_filter.h" @@ -1097,6 +1106,21 @@ void ObExprOperatorFactory::register_expr_operators() REG_OP(ObExprPrivSTAsMVTGeom); REG_OP(ObExprPrivSTMakeValid); REG_OP(ObExprCurrentRole); + REG_OP(ObExprArray); + /* vector index */ + REG_OP(ObExprVecVid); + REG_OP(ObExprVecType); + REG_OP(ObExprVecVector); + REG_OP(ObExprVecScn); + REG_OP(ObExprVecKey); + REG_OP(ObExprVecData); + REG_OP(ObExprVectorL2Distance); + REG_OP(ObExprVectorCosineDistance); + REG_OP(ObExprVectorIPDistance); + REG_OP(ObExprVectorL1Distance); + REG_OP(ObExprVectorDims); + REG_OP(ObExprVectorNorm); + REG_OP(ObExprVectorDistance); REG_OP(ObExprInnerTableOptionPrinter); REG_OP(ObExprInnerTableSequenceGetter); REG_OP(ObExprRbBuildEmpty); @@ -1121,6 +1145,7 @@ void ObExprOperatorFactory::register_expr_operators() REG_OP(ObExprRbToString); REG_OP(ObExprRbFromString); REG_OP(ObExprGetPath); + REG_OP(ObExprArrayContains); REG_OP(ObExprDecodeTraceId); REG_OP(ObExprAuditLogSetFilter); REG_OP(ObExprAuditLogRemoveFilter); @@ -1575,6 +1600,18 @@ void ObExprOperatorFactory::get_function_alias_name(const ObString &origin_name, // don't alias "power" to "pow" in oracle mode, because oracle has no // "pow" function. alias_name = ObString::make_string(N_POW); + } else if (0 == origin_name.case_compare("VEC_VID")) { + alias_name = ObString::make_string(N_VEC_VID); + } else if (0 == origin_name.case_compare("VEC_TYPE")) { + alias_name = ObString::make_string(N_VEC_TYPE); + } else if (0 == origin_name.case_compare("VEC_VECTOR")) { + alias_name = ObString::make_string(N_VEC_VECTOR); + } else if (0 == origin_name.case_compare("VEC_SCN")) { + alias_name = ObString::make_string(N_VEC_SCN); + } else if (0 == origin_name.case_compare("VEC_KEY")) { + alias_name = ObString::make_string(N_VEC_KEY); + } else if (0 == origin_name.case_compare("VEC_DATA")) { + alias_name = ObString::make_string(N_VEC_DATA); } else if (0 == origin_name.case_compare("DOC_ID")) { alias_name = ObString::make_string(N_DOC_ID); } else if (0 == origin_name.case_compare("ws")) { diff --git a/src/sql/engine/expr/ob_expr_relational_cmp_type.map b/src/sql/engine/expr/ob_expr_relational_cmp_type.map index 2c1016cc6b..ff267f34a8 100644 --- a/src/sql/engine/expr/ob_expr_relational_cmp_type.map +++ b/src/sql/engine/expr/ob_expr_relational_cmp_type.map @@ -71,7 +71,7 @@ static constexpr ObObjType RELATIONAL_CMP_TYPE[ObMaxType][ObMaxType] = ObHexStringType, /* ObGeometryType */ ObMaxType, /* UDT */ ObDecimalIntType, /* ObDecimalIntType */ - ObMaxType, /* COLLECTION */ + ObCollectionSQLType, /* COLLECTION */ ObMySQLDateType, /* MySQLDateType */ ObMySQLDateTimeType, /* MySQLDateTimeType */ ObHexStringType, /* RoaringBitmap */ @@ -1371,7 +1371,7 @@ static constexpr ObObjType RELATIONAL_CMP_TYPE[ObMaxType][ObMaxType] = ObHexStringType, /* ObGeometryType */ ObMaxType, /* UDT */ ObDecimalIntType, /* ObDecimalIntType */ - ObMaxType, /* COLLECTION */ + ObCollectionSQLType, /* COLLECTION */ ObMySQLDateTimeType, /* MySQLDateType */ ObMySQLDateTimeType, /* MySQLDateTimeType */ ObMaxType, /* RoaringBitmap */ @@ -3009,7 +3009,7 @@ static constexpr ObObjType RELATIONAL_CMP_TYPE[ObMaxType][ObMaxType] = }, /*ObCollectionSQLType*/ { - ObMaxType, /* NullType */ + ObCollectionSQLType, /* NullType */ ObMaxType, /* TinyIntType */ ObMaxType, /* SmallIntType */ ObMaxType, /* MediumIntType */ @@ -3031,7 +3031,7 @@ static constexpr ObObjType RELATIONAL_CMP_TYPE[ObMaxType][ObMaxType] = ObMaxType, /* DateType */ ObMaxType, /* TimeType */ ObMaxType, /* YearType */ - ObMaxType, /* VarcharType */ + ObCollectionSQLType, /* VarcharType */ ObMaxType, /* CharType */ ObMaxType, /* HexStringType */ ObMaxType, /* ExtendType */ @@ -3060,7 +3060,7 @@ static constexpr ObObjType RELATIONAL_CMP_TYPE[ObMaxType][ObMaxType] = ObMaxType, /* ObGeometryType */ ObMaxType, /* UDT */ ObMaxType, /* ObDecimalIntType */ - ObMaxType, /* COLLECTION */ + ObCollectionSQLType, /* COLLECTION */ ObMaxType, /* MySQLDateType */ ObMaxType, /* MySQLDateTimeType */ ObMaxType, /* RoaringBitmap */ diff --git a/src/sql/engine/expr/ob_expr_res_type.h b/src/sql/engine/expr/ob_expr_res_type.h index 45c235cbbd..915bfb3a5a 100644 --- a/src/sql/engine/expr/ob_expr_res_type.h +++ b/src/sql/engine/expr/ob_expr_res_type.h @@ -275,6 +275,7 @@ public: OB_INLINE bool is_not_null_for_write() const { return has_result_flag(NOT_NULL_WRITE_FLAG); } // calc_type: 表示表达式计算时,表达式将转换成calc_type后再计算 OB_INLINE void set_calc_type(const common::ObObjType &type) { calc_type_.set_type(type); } + OB_INLINE void set_calc_subschema_id(const uint16_t subschema_id) { calc_type_.set_subschema_id(subschema_id); } OB_INLINE void set_calc_collation_utf8() { set_calc_collation_by_charset(common::CHARSET_UTF8MB4); diff --git a/src/sql/engine/expr/ob_expr_result_type_util.cpp b/src/sql/engine/expr/ob_expr_result_type_util.cpp index 1284c4e62c..e8d17dbc7b 100644 --- a/src/sql/engine/expr/ob_expr_result_type_util.cpp +++ b/src/sql/engine/expr/ob_expr_result_type_util.cpp @@ -17,6 +17,7 @@ #include "sql/engine/expr/ob_expr_result_type_util.h" #include "sql/engine/expr/ob_expr_res_type_map.h" #include "sql/session/ob_sql_session_info.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase { @@ -851,5 +852,90 @@ int ObExprResultTypeUtil::deduce_max_string_length_oracle(const ObDataTypeCastPa return ret; } +int ObExprResultTypeUtil::get_array_calc_type(ObExecContext *exec_ctx, + const ObExprResType &type1, + const ObExprResType &type2, + ObExprResType &calc_type) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(exec_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("exec ctx is null", K(ret)); + } else { + uint32_t depth = 0; + bool is_compatiable = false; + ObDataType coll_elem1_type; + ObDataType coll_elem2_type; + bool l_is_vec = false; + bool r_is_vec = false; + ObObjType element_type; + if (OB_FAIL(ObArrayExprUtils::check_array_type_compatibility(exec_ctx, type1.get_subschema_id(), + type2.get_subschema_id(), is_compatiable))) { + LOG_WARN("failed to check array compatibilty", K(ret)); + } else if (!is_compatiable) { + ret = OB_ERR_ARRAY_TYPE_MISMATCH; + LOG_WARN("nested type is mismatch", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::get_array_element_type(exec_ctx, type1.get_subschema_id(), coll_elem1_type, depth, l_is_vec))) { + LOG_WARN("failed to get array element type", K(ret)); + } else if (OB_FAIL(ObArrayExprUtils::get_array_element_type(exec_ctx, type2.get_subschema_id(), coll_elem2_type, depth, r_is_vec))) { + LOG_WARN("failed to get array element type", K(ret)); + } else if (l_is_vec || r_is_vec) { + // cast to vec + l_is_vec ? calc_type.set_collection(type1.get_subschema_id()) : calc_type.set_collection(type2.get_subschema_id()); + } else if (coll_elem1_type.get_obj_type() == coll_elem2_type.get_obj_type() && + coll_elem1_type.get_obj_type() == ObVarcharType) { + // use subschema_id whose length is greater + if (coll_elem1_type.get_length() > coll_elem2_type.get_length()) { + calc_type.set_collection(type1.get_subschema_id()); + } else { + calc_type.set_collection(type2.get_subschema_id()); + } + } else if (OB_FAIL(get_array_calc_type(exec_ctx, coll_elem1_type.get_obj_type(), coll_elem2_type.get_obj_type(), + depth, calc_type, element_type))) { + LOG_WARN("failed to get array calc type", K(ret)); + } + } + return ret; +} + +int ObExprResultTypeUtil::get_array_calc_type(ObExecContext *exec_ctx, + const ObObjType &type1, + const ObObjType &type2, + uint32_t depth, + ObExprResType &calc_type, + ObObjType &element_type) +{ + int ret = OB_SUCCESS; + ObObjType coll_calc_type = ARITH_RESULT_TYPE[type1][type2]; + if (ob_is_int_uint(ob_obj_type_class(type1), ob_obj_type_class(type2))) { + coll_calc_type = ObIntType; + } else if (type1 == ObFloatType && type2 == ObFloatType) { + coll_calc_type = ObFloatType; + } else if (ob_is_null(type1)) { + coll_calc_type = type2; + } else if (ob_is_null(type2)) { + coll_calc_type = type1; + } + ObDataType elem_data; + uint16_t subschema_id; + elem_data.set_obj_type(coll_calc_type); + const int MAX_LEN = 256; + char type_name[MAX_LEN] = {0}; + ObString type_info; + if (coll_calc_type == ObMaxType) { + ret = OB_ERR_INVALID_TYPE_FOR_OP; + LOG_WARN("invalid subschema type", K(ret), K(type1), K(type2)); + } else if (OB_FAIL(ObArrayUtil::get_type_name(elem_data, type_name, MAX_LEN, depth))) { + LOG_WARN("failed to convert len to string", K(ret)); + } else if (FALSE_IT(type_info.assign_ptr(type_name, strlen(type_name)))) { + } else if (OB_FAIL(exec_ctx->get_subschema_id_by_type_string(type_info, subschema_id))) { + LOG_WARN("failed get subschema id", K(ret), K(type_info)); + } else { + calc_type.set_collection(subschema_id); + element_type = coll_calc_type; + } + return ret; +} + } /* sql */ } /* oceanbase */ diff --git a/src/sql/engine/expr/ob_expr_result_type_util.h b/src/sql/engine/expr/ob_expr_result_type_util.h index f9997d5fa7..e94addbc42 100644 --- a/src/sql/engine/expr/ob_expr_result_type_util.h +++ b/src/sql/engine/expr/ob_expr_result_type_util.h @@ -222,6 +222,16 @@ public: return get_arith_calc_type(calc_type, calc_ob1_type, calc_ob2_type, type1, type2, ObArithResultTypeMap::OP::MOD); } + static int get_array_calc_type(ObExecContext *exec_ctx, + const ObExprResType &type1, + const ObExprResType &type2, + ObExprResType &calc_type); + static int get_array_calc_type(ObExecContext *exec_ctx, + const ObObjType &type1, + const ObObjType &type2, + uint32_t depth, + ObExprResType &calc_type, + ObObjType &element_type); }; diff --git a/src/sql/engine/expr/ob_expr_sql_udt_utils.cpp b/src/sql/engine/expr/ob_expr_sql_udt_utils.cpp index 5dff52a1dd..62d04e85cd 100644 --- a/src/sql/engine/expr/ob_expr_sql_udt_utils.cpp +++ b/src/sql/engine/expr/ob_expr_sql_udt_utils.cpp @@ -19,6 +19,7 @@ #include "pl/ob_pl.h" #include "pl/ob_pl_user_type.h" #include "src/pl/ob_pl_resolver.h" +#include "lib/udt/ob_array_type.h" using namespace oceanbase::common; using namespace oceanbase::sql; @@ -873,6 +874,35 @@ int ObSqlUdtUtils::convert_sql_udt_to_string(ObObj &sql_udt_obj, return ret; } +int ObSqlUdtUtils::convert_collection_to_string(ObObj &coll_obj, const ObSqlCollectionInfo &coll_meta, + common::ObIAllocator *allocator, ObString &res_str) +{ + int ret = OB_SUCCESS; + ObIArrayType *arr_obj = NULL; + ObString coll_data = coll_obj.get_string(); + ObStringBuffer buf(allocator); + ObArenaAllocator lob_allocator(ObModIds::OB_LOB_ACCESS_BUFFER, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObCollectionArrayType *arr_type = static_cast(coll_meta.collection_meta_); + if (OB_FAIL(ObTextStringHelper::read_real_string_data(&lob_allocator, + ObLongTextType, + CS_TYPE_BINARY, + true, coll_data))) { + LOG_WARN("fail to get real string data", K(ret), K(coll_data)); + } else if (OB_FAIL(ObArrayTypeObjFactory::construct(*allocator, *arr_type, arr_obj, true))) { + LOG_WARN("construct array obj failed", K(ret), K(coll_meta)); + } else { + if (OB_FAIL(arr_obj->init(coll_data))) { + LOG_WARN("failed to init array", K(ret)); + } else if (OB_FAIL(arr_obj->print(arr_type->element_type_, buf))) { + LOG_WARN("failed to format array", K(ret)); + } else { + res_str.assign_ptr(buf.ptr(), buf.length()); + } + } + + return ret; +} + int ObSqlUdtUtils::cast_pl_varray_to_sql_varray(common::ObIAllocator &res_allocator, ObString &res, const ObObj root_obj, diff --git a/src/sql/engine/expr/ob_expr_sql_udt_utils.h b/src/sql/engine/expr/ob_expr_sql_udt_utils.h index 1d8b1320dc..12f1337030 100644 --- a/src/sql/engine/expr/ob_expr_sql_udt_utils.h +++ b/src/sql/engine/expr/ob_expr_sql_udt_utils.h @@ -145,6 +145,8 @@ public: sql::ObExecContext *exec_context, ObSqlUDT &sql_udt, ObString &res_str); + static int convert_collection_to_string(ObObj &coll_obj, const ObSqlCollectionInfo &coll_meta, + common::ObIAllocator *allocator, ObString &res_str); static bool ob_udt_util_check_same_type(ObObjType type1, ObObjType type2) { diff --git a/src/sql/engine/expr/ob_expr_vec_data.cpp b/src/sql/engine/expr/ob_expr_vec_data.cpp new file mode 100644 index 0000000000..8e5bee2bc7 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vec_data.cpp @@ -0,0 +1,80 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX COMMON + +#include "sql/engine/expr/ob_expr_vec_data.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +ObExprVecData::ObExprVecData(ObIAllocator &allocator) + : ObFuncExprOperator(allocator, T_FUN_SYS_VEC_DATA, N_VEC_DATA, MORE_THAN_ZERO, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ + need_charset_convert_ = false; +} + +int ObExprVecData::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(param_num < 1) || OB_ISNULL(types)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument for fulltext expr", K(ret), K(param_num), KP(types)); + } else { + type.set_blob(); + type.set_length(OB_MAX_LONGTEXT_LENGTH); + } + return ret;} + +int ObExprVecData::calc_resultN(ObObj &result, + const ObObj *objs_array, + int64_t param_num, + ObExprCtx &expr_ctx) const +{ + return OB_NOT_SUPPORTED; +} + +int ObExprVecData::cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + UNUSED(raw_expr); + UNUSED(expr_cg_ctx); + if (OB_UNLIKELY(rt_expr.arg_cnt_ < 1) || OB_ISNULL(rt_expr.args_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(rt_expr.arg_cnt_), KP(rt_expr.args_), K(rt_expr.type_)); + } else { + rt_expr.eval_func_ = generate_vec_data; + } + return ret; +} + +/*static*/ int ObExprVecData::generate_vec_data( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum) +{ + int ret = OB_SUCCESS; + UNUSEDx(raw_ctx, eval_ctx); + expr_datum.set_null(); + return ret; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_vec_data.h b/src/sql/engine/expr/ob_expr_vec_data.h new file mode 100644 index 0000000000..7647a0d4ec --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vec_data.h @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_DATA_H_ +#define OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_DATA_H_ +#include "sql/engine/expr/ob_expr_operator.h" + +namespace oceanbase +{ +namespace sql +{ +class ObExprVecData : public ObFuncExprOperator +{ +public: + explicit ObExprVecData(common::ObIAllocator &alloc); + virtual ~ObExprVecData() {} + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + virtual int calc_resultN(common::ObObj &result, + const common::ObObj *objs_array, + int64_t param_num, + common::ObExprCtx &expr_ctx) const override; + virtual common::ObCastMode get_cast_mode() const override { return CM_NULL_ON_WARN;} + virtual int cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + static int generate_vec_data( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum); +private : + //disallow copy + DISALLOW_COPY_AND_ASSIGN(ObExprVecData); +}; +} +} +#endif /* OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_DATA_H_ */ diff --git a/src/sql/engine/expr/ob_expr_vec_key.cpp b/src/sql/engine/expr/ob_expr_vec_key.cpp new file mode 100644 index 0000000000..79f2e87367 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vec_key.cpp @@ -0,0 +1,83 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX COMMON + +#include "sql/engine/expr/ob_expr_vec_key.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +ObExprVecKey::ObExprVecKey(ObIAllocator &allocator) + : ObFuncExprOperator(allocator, T_FUN_SYS_VEC_KEY, N_VEC_KEY, MORE_THAN_ZERO, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ + need_charset_convert_ = false; +} + +int ObExprVecKey::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(param_num < 1) || OB_ISNULL(types)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument for fulltext expr", K(ret), K(param_num), KP(types)); + } else { + type.set_varchar(); + type.set_length(OB_MAX_VARCHAR_LENGTH); + type.set_default_collation_type(); + type.set_collation_level(common::CS_LEVEL_IMPLICIT); + } + return ret; +} + +int ObExprVecKey::calc_resultN(ObObj &result, + const ObObj *objs_array, + int64_t param_num, + ObExprCtx &expr_ctx) const +{ + return OB_NOT_SUPPORTED; +} + +int ObExprVecKey::cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + UNUSED(raw_expr); + UNUSED(expr_cg_ctx); + if (OB_UNLIKELY(rt_expr.arg_cnt_ < 1) || OB_ISNULL(rt_expr.args_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(rt_expr.arg_cnt_), KP(rt_expr.args_), K(rt_expr.type_)); + } else { + rt_expr.eval_func_ = generate_vec_key; + } + return ret; +} + +/*static*/ int ObExprVecKey::generate_vec_key( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum) +{ + int ret = OB_SUCCESS; + UNUSEDx(raw_ctx, eval_ctx); + expr_datum.set_null(); + return ret; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_vec_key.h b/src/sql/engine/expr/ob_expr_vec_key.h new file mode 100644 index 0000000000..29d09922f9 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vec_key.h @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_KEY_H_ +#define OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_KEY_H_ +#include "sql/engine/expr/ob_expr_operator.h" + +namespace oceanbase +{ +namespace sql +{ +class ObExprVecKey : public ObFuncExprOperator +{ +public: + explicit ObExprVecKey(common::ObIAllocator &alloc); + virtual ~ObExprVecKey() {} + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + virtual int calc_resultN(common::ObObj &result, + const common::ObObj *objs_array, + int64_t param_num, + common::ObExprCtx &expr_ctx) const override; + virtual common::ObCastMode get_cast_mode() const override { return CM_NULL_ON_WARN;} + virtual int cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + static int generate_vec_key( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum); +private : + //disallow copy + DISALLOW_COPY_AND_ASSIGN(ObExprVecKey); +}; +} +} +#endif /* OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_KEY_H_ */ diff --git a/src/sql/engine/expr/ob_expr_vec_scn.cpp b/src/sql/engine/expr/ob_expr_vec_scn.cpp new file mode 100644 index 0000000000..2593837d42 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vec_scn.cpp @@ -0,0 +1,80 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX COMMON + +#include "sql/engine/expr/ob_expr_vec_scn.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +ObExprVecScn::ObExprVecScn(ObIAllocator &allocator) + : ObFuncExprOperator(allocator, T_FUN_SYS_VEC_SCN, N_VEC_SCN, MORE_THAN_ZERO, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ + need_charset_convert_ = false; +} + +int ObExprVecScn::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(param_num < 1) || OB_ISNULL(types)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument for fulltext expr", K(ret), K(param_num), KP(types)); + } else { + type.set_int(); + } + return ret; +} + +int ObExprVecScn::calc_resultN(ObObj &result, + const ObObj *objs_array, + int64_t param_num, + ObExprCtx &expr_ctx) const +{ + return OB_NOT_SUPPORTED; +} + +int ObExprVecScn::cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + UNUSED(raw_expr); + UNUSED(expr_cg_ctx); + if (OB_UNLIKELY(rt_expr.arg_cnt_ < 1) || OB_ISNULL(rt_expr.args_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(rt_expr.arg_cnt_), KP(rt_expr.args_), K(rt_expr.type_)); + } else { + rt_expr.eval_func_ = generate_vec_scn; + } + return ret; +} + +/*static*/ int ObExprVecScn::generate_vec_scn( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum) +{ + int ret = OB_SUCCESS; + UNUSEDx(raw_ctx, eval_ctx); + expr_datum.set_null(); + return ret; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_vec_scn.h b/src/sql/engine/expr/ob_expr_vec_scn.h new file mode 100644 index 0000000000..c1fc225cd9 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vec_scn.h @@ -0,0 +1,50 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_SCN_H_ +#define OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_SCN_H_ +#include "sql/engine/expr/ob_expr_operator.h" + +namespace oceanbase +{ +namespace sql +{ +class ObExprVecScn : public ObFuncExprOperator +{ +public: + explicit ObExprVecScn(common::ObIAllocator &alloc); + virtual ~ObExprVecScn() {} + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + virtual int calc_resultN(common::ObObj &result, + const common::ObObj *objs_array, + int64_t param_num, + common::ObExprCtx &expr_ctx) const override; + virtual common::ObCastMode get_cast_mode() const override { return CM_NULL_ON_WARN;} + virtual int cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + static int generate_vec_scn( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum); + +private : + //disallow copy + DISALLOW_COPY_AND_ASSIGN(ObExprVecScn); +}; +} +} +#endif /* OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_SCN_H_ */ diff --git a/src/sql/engine/expr/ob_expr_vec_type.cpp b/src/sql/engine/expr/ob_expr_vec_type.cpp new file mode 100644 index 0000000000..aa364da24e --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vec_type.cpp @@ -0,0 +1,83 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX COMMON + +#include "sql/engine/expr/ob_expr_vec_type.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +ObExprVecType::ObExprVecType(ObIAllocator &allocator) + : ObFuncExprOperator(allocator, T_FUN_SYS_VEC_TYPE, N_VEC_TYPE, MORE_THAN_ZERO, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ + need_charset_convert_ = false; +} + +int ObExprVecType::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(param_num < 1) || OB_ISNULL(types)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument for fulltext expr", K(ret), K(param_num), KP(types)); + } else { + type.set_char(); + type.set_length(0); + type.set_default_collation_type(); + type.set_collation_level(common::CS_LEVEL_IMPLICIT); + } + return ret; +} + +int ObExprVecType::calc_resultN(ObObj &result, + const ObObj *objs_array, + int64_t param_num, + ObExprCtx &expr_ctx) const +{ + return OB_NOT_SUPPORTED; +} + +int ObExprVecType::cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + UNUSED(raw_expr); + UNUSED(expr_cg_ctx); + if (OB_UNLIKELY(rt_expr.arg_cnt_ < 1) || OB_ISNULL(rt_expr.args_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(rt_expr.arg_cnt_), KP(rt_expr.args_), K(rt_expr.type_)); + } else { + rt_expr.eval_func_ = generate_vec_type; + } + return ret; +} + +/*static*/ int ObExprVecType::generate_vec_type( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum) +{ + int ret = OB_SUCCESS; + UNUSEDx(raw_ctx, eval_ctx); + expr_datum.set_null(); + return ret; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_vec_type.h b/src/sql/engine/expr/ob_expr_vec_type.h new file mode 100644 index 0000000000..8fbbf2e1fc --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vec_type.h @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_TYPE_H_ +#define OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_TYPE_H_ +#include "sql/engine/expr/ob_expr_operator.h" + +namespace oceanbase +{ +namespace sql +{ +class ObExprVecType : public ObFuncExprOperator +{ +public: + explicit ObExprVecType(common::ObIAllocator &alloc); + virtual ~ObExprVecType() {} + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + virtual int calc_resultN(common::ObObj &result, + const common::ObObj *objs_array, + int64_t param_num, + common::ObExprCtx &expr_ctx) const override; + virtual common::ObCastMode get_cast_mode() const override { return CM_NULL_ON_WARN;} + virtual int cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + static int generate_vec_type( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum); +private : + //disallow copy + DISALLOW_COPY_AND_ASSIGN(ObExprVecType); +}; +} +} +#endif /* OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_TYPE_H_ */ diff --git a/src/sql/engine/expr/ob_expr_vec_vector.cpp b/src/sql/engine/expr/ob_expr_vec_vector.cpp new file mode 100644 index 0000000000..7a76b474e8 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vec_vector.cpp @@ -0,0 +1,100 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX COMMON + +#include "sql/engine/expr/ob_expr_vec_vector.h" +#include "lib/udt/ob_collection_type.h" +#include "lib/udt/ob_array_type.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" +#include "sql/engine/expr/ob_array_expr_utils.h" +#include "sql/engine/ob_exec_context.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +ObExprVecVector::ObExprVecVector(ObIAllocator &allocator) + : ObFuncExprOperator(allocator, T_FUN_SYS_VEC_VECTOR, N_VEC_VECTOR, MORE_THAN_ZERO, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ + need_charset_convert_ = false; +} + +int ObExprVecVector::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + ObSQLSessionInfo *session = const_cast(type_ctx.get_session()); + ObExecContext *exec_ctx = OB_ISNULL(session) ? NULL : session->get_cur_exec_ctx(); + ObDataType elem_type; + elem_type.meta_.set_float(); + uint16_t subschema_id; + if (OB_ISNULL(exec_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("exec ctx is null", K(ret)); + } else if (OB_FAIL(exec_ctx->get_subschema_id_by_collection_elem_type(ObNestedType::OB_VECTOR_TYPE, + elem_type, subschema_id))) { + LOG_WARN("failed to get collection subschema id", K(ret)); + } else { + type.set_collection(subschema_id); + } + return ret; +} + +int ObExprVecVector::calc_resultN(ObObj &result, + const ObObj *objs_array, + int64_t param_num, + ObExprCtx &expr_ctx) const +{ + return OB_NOT_SUPPORTED; +} + +int ObExprVecVector::cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + UNUSED(raw_expr); + UNUSED(expr_cg_ctx); + if (OB_UNLIKELY(rt_expr.arg_cnt_ < 1) || OB_ISNULL(rt_expr.args_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(rt_expr.arg_cnt_), KP(rt_expr.args_), K(rt_expr.type_)); + } else { + rt_expr.eval_func_ = generate_vec_vector; + } + return ret; +} + +/*static*/ int ObExprVecVector::generate_vec_vector( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum) +{ + int ret = OB_SUCCESS; + ObDatum *datum = nullptr; + if (OB_FAIL(raw_ctx.args_[0]->eval(eval_ctx, datum))) { + LOG_WARN("fail to eval arg expr", K(ret), KPC(raw_ctx.args_[0])); + } else if (OB_ISNULL(datum)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null datum", K(ret), KPC(raw_ctx.args_[0])); + } else { + expr_datum.set_string(datum->get_string()); + } + return ret; +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_vec_vector.h b/src/sql/engine/expr/ob_expr_vec_vector.h new file mode 100644 index 0000000000..434ce4deaa --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vec_vector.h @@ -0,0 +1,50 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_VECTOR_H_ +#define OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_VECTOR_H_ +#include "sql/engine/expr/ob_expr_operator.h" + +namespace oceanbase +{ +namespace sql +{ +class ObExprVecVector : public ObFuncExprOperator +{ +public: + explicit ObExprVecVector(common::ObIAllocator &alloc); + virtual ~ObExprVecVector() {} + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + virtual int calc_resultN(common::ObObj &result, + const common::ObObj *objs_array, + int64_t param_num, + common::ObExprCtx &expr_ctx) const override; + virtual common::ObCastMode get_cast_mode() const override { return CM_NULL_ON_WARN;} + virtual int cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + static int generate_vec_vector( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum); + +private : + //disallow copy + DISALLOW_COPY_AND_ASSIGN(ObExprVecVector); +}; +} +} +#endif /* OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_VECTOR_H_ */ diff --git a/src/sql/engine/expr/ob_expr_vec_vid.cpp b/src/sql/engine/expr/ob_expr_vec_vid.cpp new file mode 100644 index 0000000000..7eabd75f2f --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vec_vid.cpp @@ -0,0 +1,100 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX COMMON + +#include "sql/engine/expr/ob_expr_vec_vid.h" +#include "sql/engine/ob_exec_context.h" +#include "share/ob_tablet_autoincrement_service.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +ObExprVecVid::ObExprVecVid(ObIAllocator &allocator) + : ObFuncExprOperator(allocator, T_FUN_SYS_VEC_VID, N_VEC_VID, ZERO_OR_ONE, VALID_FOR_GENERATED_COL, NOT_ROW_DIMENSION) +{ + need_charset_convert_ = false; +} + +int ObExprVecVid::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + UNUSEDx(param_num, types); + type.set_int(); + return ret; +} + +int ObExprVecVid::calc_resultN(ObObj &result, + const ObObj *objs_array, + int64_t param_num, + ObExprCtx &expr_ctx) const +{ + return OB_NOT_SUPPORTED; +} + +int ObExprVecVid::cg_expr( + ObExprCGCtx &cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(rt_expr.arg_cnt_ != 1 && rt_expr.arg_cnt_ != 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param count", K(rt_expr.arg_cnt_), K(rt_expr.args_), K(rt_expr.type_)); + } else if (OB_UNLIKELY(rt_expr.arg_cnt_ == 1 && OB_ISNULL(rt_expr.args_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, rt_expr.args_ is nullptr", K(rt_expr.arg_cnt_), K(rt_expr.args_), K(rt_expr.type_)); + } else { + rt_expr.eval_func_ = generate_vec_id; + } + return ret; +} + +/*static*/ int ObExprVecVid::generate_vec_id( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum) +{ + int ret = OB_SUCCESS; + if (raw_ctx.arg_cnt_ == 0) { + LOG_DEBUG("[vec index debug]succeed to genearte empty vid", KP(&raw_ctx), K(raw_ctx), K(expr_datum), K(eval_ctx)); + } else if (OB_UNLIKELY(1 != raw_ctx.arg_cnt_) || OB_ISNULL(raw_ctx.args_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(raw_ctx), KP(raw_ctx.args_)); + } else { + ObExpr *calc_part_id_expr = raw_ctx.args_[0]; + ObObjectID partition_id = OB_INVALID_ID; + ObTabletID tablet_id; + if (OB_FAIL(ObExprCalcPartitionBase::calc_part_and_tablet_id(calc_part_id_expr, eval_ctx, partition_id, tablet_id))) { + LOG_WARN("calc part and tablet id by expr failed", K(ret)); + } else { + share::ObTabletAutoincrementService &auto_inc = share::ObTabletAutoincrementService::get_instance(); + uint64_t seq_id = 0; + if (OB_FAIL(auto_inc.get_autoinc_seq(MTL_ID(), tablet_id, seq_id))) { + LOG_WARN("fail to get tablet autoinc seq", K(ret), K(tablet_id)); + } else { + expr_datum.set_int(seq_id); + FLOG_INFO("succeed to genearte vector id", K(tablet_id), K(seq_id)); + } + } + } + return ret; +} + + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/engine/expr/ob_expr_vec_vid.h b/src/sql/engine/expr/ob_expr_vec_vid.h new file mode 100644 index 0000000000..18e81d0513 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vec_vid.h @@ -0,0 +1,49 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_VID_H_ +#define OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_VID_H_ +#include "sql/engine/expr/ob_expr_operator.h" + +namespace oceanbase +{ +namespace sql +{ +class ObExprVecVid : public ObFuncExprOperator +{ +public: + explicit ObExprVecVid(common::ObIAllocator &alloc); + virtual ~ObExprVecVid() {} + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const override; + virtual int calc_resultN(common::ObObj &result, + const common::ObObj *objs_array, + int64_t param_num, + common::ObExprCtx &expr_ctx) const override; + virtual common::ObCastMode get_cast_mode() const override { return CM_NULL_ON_WARN;} + virtual int cg_expr( + ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + static int generate_vec_id( + const ObExpr &raw_ctx, + ObEvalCtx &eval_ctx, + ObDatum &expr_datum); +private : + //disallow copy + DISALLOW_COPY_AND_ASSIGN(ObExprVecVid); +}; +} +} +#endif /* OCEANBASE_SRC_SQL_ENGINE_EXPR_OB_EXPR_VEC_VID_H_ */ diff --git a/src/sql/engine/expr/ob_expr_vector.cpp b/src/sql/engine/expr/ob_expr_vector.cpp new file mode 100644 index 0000000000..30db6958ee --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vector.cpp @@ -0,0 +1,341 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan + * PubL v2. You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY + * KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the + * Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG +#include "sql/engine/expr/ob_expr_vector.h" +#include "sql/engine/ob_subschema_ctx.h" +#include "sql/engine/ob_exec_context.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" +#include "sql/engine/expr/ob_array_expr_utils.h" +#include "sql/engine/expr/ob_array_cast.h" +#include "share/vector_type/ob_vector_l2_distance.h" +#include "share/vector_type/ob_vector_cosine_distance.h" +#include "share/vector_type/ob_vector_ip_distance.h" +#include "share/vector_type/ob_vector_norm.h" +#include "share/vector_type/ob_vector_l1_distance.h" + +namespace oceanbase +{ +namespace sql +{ +ObExprVector::ObExprVector(ObIAllocator &alloc, + ObExprOperatorType type, + const char *name, + int32_t param_num, + int32_t dimension) : ObFuncExprOperator(alloc, type, name, param_num, VALID_FOR_GENERATED_COL, dimension) +{ +} + +// [a,b,c,...] is array type, there is no dim_cnt_ in ObCollectionArrayType +int ObExprVector::calc_result_type2( + ObExprResType &type, + ObExprResType &type1, + ObExprResType &type2, + common::ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + uint16_t unused_id = UINT16_MAX; + if (OB_FAIL(ObArrayExprUtils::calc_cast_type2(type1, type2, type_ctx, unused_id))) { + LOG_WARN("failed to calc cast type", K(ret), K(type1)); + } else { + type.set_type(ObDoubleType); + type.set_calc_type(ObDoubleType); + } + return ret; +} + +int ObExprVector::calc_result_type1( + ObExprResType &type, + ObExprResType &type1, + common::ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObArrayExprUtils::calc_cast_type(type1, type_ctx))) { + LOG_WARN("failed to calc cast type", K(ret), K(type1)); + } else { + type.set_type(ObDoubleType); + type.set_calc_type(ObDoubleType); + } + return ret; +} + +ObExprVectorDistance::ObExprVectorDistance(ObIAllocator &alloc) + : ObExprVector(alloc, T_FUN_SYS_VECTOR_DISTANCE, N_VECTOR_DISTANCE, TWO_OR_THREE, NOT_ROW_DIMENSION) +{} + +ObExprVectorDistance::ObExprVectorDistance( + ObIAllocator &alloc, + ObExprOperatorType type, + const char *name, + int32_t param_num, + int32_t dimension) + : ObExprVector(alloc, type, name, param_num, dimension) +{} + +ObExprVectorDistance::FuncPtrType ObExprVectorDistance::distance_funcs[] = +{ + ObVectorCosineDistance::cosine_distance_func, + ObVectorIpDistance::ip_distance_func, + ObVectorL2Distance::l2_distance_func, + ObVectorL1Distance::l1_distance_func, + ObVectorL2Distance::l2_square_func, + nullptr, +}; + +int ObExprVectorDistance::calc_result_typeN( + ObExprResType &type, + ObExprResType *types_stack, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(param_num > 3)) { + ObString func_name_(get_name()); + ret = OB_ERR_PARAM_SIZE; + LOG_USER_ERROR(OB_ERR_PARAM_SIZE, func_name_.length(), func_name_.ptr()); + } else if (OB_FAIL(calc_result_type2(type, types_stack[0], types_stack[1], type_ctx))) { + LOG_WARN("failed to calc result type", K(ret)); + } + return ret; +} + +int ObExprVectorDistance::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + rt_expr.eval_func_ = ObExprVectorDistance::calc_distance; + return ret; +} + +int ObExprVectorDistance::calc_distance(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) +{ + int ret = OB_SUCCESS; + ObVecDisType dis_type = ObVecDisType::EUCLIDEAN; // default metric + if (3 == expr.arg_cnt_) { + ObDatum *datum = NULL; + if (OB_FAIL(expr.args_[2]->eval(ctx, datum))) { + LOG_WARN("eval failed", K(ret)); + } else if (datum->is_null()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(*datum)); + } else { + dis_type = static_cast(datum->get_int()); + } + } + if (FAILEDx(calc_distance(expr, ctx, res_datum, dis_type))) { + LOG_WARN("failed to calc distance", K(ret), K(dis_type)); + } + return ret; +} + +int ObExprVectorDistance::calc_distance(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum, ObVecDisType dis_type) +{ + int ret = OB_SUCCESS; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + ObIArrayType *arr_l = NULL; + ObIArrayType *arr_r = NULL; + bool contain_null = false; + if (dis_type < ObVecDisType::COSINE || dis_type >= ObVecDisType::MAX_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect distance type", K(ret), K(dis_type)); + } else if (OB_FAIL(ObArrayExprUtils::get_type_vector(*(expr.args_[0]), ctx, tmp_allocator, arr_l, contain_null))) { + LOG_WARN("failed to get vector", K(ret), K(*expr.args_[0])); + } else if (OB_FAIL(ObArrayExprUtils::get_type_vector(*(expr.args_[1]), ctx, tmp_allocator, arr_r, contain_null))) { + LOG_WARN("failed to get vector", K(ret), K(*expr.args_[1])); + } else if (contain_null) { + res_datum.set_null(); + } else if (OB_ISNULL(arr_l) || OB_ISNULL(arr_r)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(arr_l), K(arr_r)); + } else if (OB_UNLIKELY(arr_l->size() != arr_r->size())) { + ret = OB_ERR_INVALID_VECTOR_DIM; + LOG_WARN("check array validty failed", K(ret), K(arr_l->size()), K(arr_r->size())); + } else if (arr_l->contain_null() || arr_r->contain_null()) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("array with null can't calculate vector distance", K(ret)); + } else { + double distance = 0.0; + const float *data_l = reinterpret_cast(arr_l->get_data()); + const float *data_r = reinterpret_cast(arr_r->get_data()); + const uint32_t size = arr_l->size(); + if (distance_funcs[dis_type] == nullptr) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support", K(ret), K(dis_type)); + } else if (OB_FAIL(distance_funcs[dis_type](data_l, data_r, size, distance))) { + if (OB_ERR_NULL_VALUE == ret) { + res_datum.set_null(); + ret = OB_SUCCESS; // ignore + } else { + LOG_WARN("failed to calc distance", K(ret), K(dis_type)); + } + } else { + res_datum.set_double(distance); + } + } + return ret; +} + +ObExprVectorL1Distance::ObExprVectorL1Distance(ObIAllocator &alloc) + : ObExprVectorDistance(alloc, T_FUN_SYS_L1_DISTANCE, N_VECTOR_L1_DISTANCE, 2, NOT_ROW_DIMENSION) {} + +int ObExprVectorL1Distance::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + rt_expr.eval_func_ = ObExprVectorL1Distance::calc_l1_distance; + return ret; +} + +int ObExprVectorL1Distance::calc_l1_distance(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) +{ + return ObExprVectorDistance::calc_distance(expr, ctx, res_datum, ObVecDisType::MANHATTAN); +} + +ObExprVectorL2Distance::ObExprVectorL2Distance(ObIAllocator &alloc) + : ObExprVectorDistance(alloc, T_FUN_SYS_L2_DISTANCE, N_VECTOR_L2_DISTANCE, 2, NOT_ROW_DIMENSION) {} + +int ObExprVectorL2Distance::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + rt_expr.eval_func_ = ObExprVectorL2Distance::calc_l2_distance; + return ret; +} + +int ObExprVectorL2Distance::calc_l2_distance(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) +{ + return ObExprVectorDistance::calc_distance(expr, ctx, res_datum, ObVecDisType::EUCLIDEAN); +} + +ObExprVectorCosineDistance::ObExprVectorCosineDistance(ObIAllocator &alloc) + : ObExprVectorDistance(alloc, T_FUN_SYS_COSINE_DISTANCE, N_VECTOR_COS_DISTANCE, 2, NOT_ROW_DIMENSION) {} + +int ObExprVectorCosineDistance::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + rt_expr.eval_func_ = ObExprVectorCosineDistance::calc_cosine_distance; + return ret; +} + +int ObExprVectorCosineDistance::calc_cosine_distance(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) +{ + return ObExprVectorDistance::calc_distance(expr, ctx, res_datum, ObVecDisType::COSINE); +} + +ObExprVectorIPDistance::ObExprVectorIPDistance(ObIAllocator &alloc) + : ObExprVectorDistance(alloc, T_FUN_SYS_INNER_PRODUCT, N_VECTOR_INNER_PRODUCT, 2, NOT_ROW_DIMENSION) {} + +int ObExprVectorIPDistance::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + rt_expr.eval_func_ = ObExprVectorIPDistance::calc_inner_product; + return ret; +} + +int ObExprVectorIPDistance::calc_inner_product(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) +{ + return ObExprVectorDistance::calc_distance(expr, ctx, res_datum, ObVecDisType::DOT); +} + +ObExprVectorDims::ObExprVectorDims(ObIAllocator &alloc) + : ObExprVector(alloc, T_FUN_SYS_VECTOR_DIMS, N_VECTOR_DIMS, 1, NOT_ROW_DIMENSION) {} + +int ObExprVectorDims::calc_result_type1( + ObExprResType &type, + ObExprResType &type1, + common::ObExprTypeCtx &type_ctx) const +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObArrayExprUtils::calc_cast_type(type1, type_ctx))) { + LOG_WARN("failed to calc cast type", K(ret), K(type1)); + } else { + type.set_type(ObIntType); + type.set_calc_type(ObIntType); + } + return ret; +} +int ObExprVectorDims::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + rt_expr.eval_func_ = ObExprVectorDims::calc_dims; + return ret; +} + +int ObExprVectorDims::calc_dims(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) +{ + int ret = OB_SUCCESS; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + ObIArrayType *arr = NULL; + bool contain_null = false; + if (OB_FAIL(ObArrayExprUtils::get_type_vector(*(expr.args_[0]), ctx, tmp_allocator, arr, contain_null))) { + LOG_WARN("failed to get vector", K(ret), K(*expr.args_[0])); + } else if (contain_null) { + res_datum.set_null(); + } else if (OB_ISNULL(arr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(arr)); + } else if (arr->contain_null()) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("array with null can't calculate vector norm", K(ret)); + } else { + res_datum.set_int(arr->size()); + } + return ret; +} + +ObExprVectorNorm::ObExprVectorNorm(ObIAllocator &alloc) + : ObExprVector(alloc, T_FUN_SYS_VECTOR_NORM, N_VECTOR_NORM, 1, NOT_ROW_DIMENSION) {} + +int ObExprVectorNorm::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + rt_expr.eval_func_ = ObExprVectorNorm::calc_norm; + return ret; +} + +int ObExprVectorNorm::calc_norm(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) +{ + int ret = OB_SUCCESS; + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &tmp_allocator = tmp_alloc_g.get_allocator(); + ObIArrayType *arr = NULL; + bool contain_null = false; + if (OB_FAIL(ObArrayExprUtils::get_type_vector(*(expr.args_[0]), ctx, tmp_allocator, arr, contain_null))) { + LOG_WARN("failed to get vector", K(ret), K(*expr.args_[0])); + } else if (contain_null) { + res_datum.set_null(); + } else if (OB_ISNULL(arr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), K(arr)); + } else if (arr->contain_null()) { + ret = OB_ERR_NULL_VALUE; + LOG_WARN("array with null can't calculate vector norm", K(ret)); + } else { + double norm = 0.0; + const float *data = reinterpret_cast(arr->get_data()); + if (OB_FAIL(ObVectorNorm::vector_norm_func(data, arr->size(), norm))) { + LOG_WARN("failed to calc vector norm", K(ret)); + } else { + res_datum.set_double(norm); + } + } + return ret; +} + +} // sql +} // oceanbase \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_vector.h b/src/sql/engine/expr/ob_expr_vector.h new file mode 100644 index 0000000000..2b27486d04 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_vector.h @@ -0,0 +1,201 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan + * PubL v2. You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY + * KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO + * NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the + * Mulan PubL v2 for more details. + */ + +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + * This file contains implementation for array. + */ + +#ifndef OCEANBASE_SQL_OB_EXPR_VECTOR +#define OCEANBASE_SQL_OB_EXPR_VECTOR + +#include "sql/engine/expr/ob_expr_operator.h" +#include "lib/udt/ob_array_type.h" + + +namespace oceanbase +{ +namespace sql +{ +class ObExprVector : public ObFuncExprOperator +{ +public: + struct VectorCastInfo + { + VectorCastInfo() + : is_vector_(false), + need_cast_(false), + subschema_id_(UINT16_MAX), + dim_cnt_(0) + {} + bool is_vector_; + bool need_cast_; + uint16_t subschema_id_; + uint16_t dim_cnt_; + }; + +public: + explicit ObExprVector(common::ObIAllocator &alloc, ObExprOperatorType type, + const char *name, int32_t param_num, int32_t dimension); + virtual ~ObExprVector() {}; + virtual int calc_result_type2(ObExprResType &type, + ObExprResType &type1, + ObExprResType &type2, + common::ObExprTypeCtx &type_ctx) + const override; + virtual int calc_result_type1(ObExprResType &type, + ObExprResType &type1, + common::ObExprTypeCtx &type_ctx) + const override; +private: + int collect_vector_cast_info(ObExprResType &type, ObExecContext &exec_ctx, VectorCastInfo &info) const; + DISALLOW_COPY_AND_ASSIGN(ObExprVector); +}; + +class ObExprVectorDistance : public ObExprVector +{ +public: + enum ObVecDisType + { + COSINE = 0, + DOT, // inner product + EUCLIDEAN, // L2 + MANHATTAN, // L1 + EUCLIDEAN_SQUARED, // L2_SQUARED + HAMMING, + MAX_TYPE, + }; + using FuncPtrType = int (*)(const float* a, const float* b, const int64_t len, double& distance); + static FuncPtrType distance_funcs[]; +public: + explicit ObExprVectorDistance(common::ObIAllocator &alloc); + explicit ObExprVectorDistance(common::ObIAllocator &alloc, ObExprOperatorType type, + const char *name, int32_t param_num, int32_t dimension); + virtual ~ObExprVectorDistance() {}; + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types_stack, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) + const override; + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + static int calc_distance(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum); + static int calc_distance(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum, ObVecDisType dis_type); + +private: + DISALLOW_COPY_AND_ASSIGN(ObExprVectorDistance); +}; + +class ObExprVectorL1Distance : public ObExprVectorDistance +{ +public: + explicit ObExprVectorL1Distance(common::ObIAllocator &alloc); + virtual ~ObExprVectorL1Distance() {}; + + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + + static int calc_l1_distance(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum); +private: + DISALLOW_COPY_AND_ASSIGN(ObExprVectorL1Distance); +}; + +class ObExprVectorL2Distance : public ObExprVectorDistance +{ +public: + explicit ObExprVectorL2Distance(common::ObIAllocator &alloc); + virtual ~ObExprVectorL2Distance() {}; + + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + + static int calc_l2_distance(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum); +private: + DISALLOW_COPY_AND_ASSIGN(ObExprVectorL2Distance); +}; + +class ObExprVectorCosineDistance : public ObExprVectorDistance +{ +public: + explicit ObExprVectorCosineDistance(common::ObIAllocator &alloc); + virtual ~ObExprVectorCosineDistance() {}; + + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + + static int calc_cosine_distance(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum); +private: + DISALLOW_COPY_AND_ASSIGN(ObExprVectorCosineDistance); +}; + +class ObExprVectorIPDistance : public ObExprVectorDistance +{ +public: + explicit ObExprVectorIPDistance(common::ObIAllocator &alloc); + virtual ~ObExprVectorIPDistance() {}; + + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + + static int calc_inner_product(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum); +private: + DISALLOW_COPY_AND_ASSIGN(ObExprVectorIPDistance); +}; + +class ObExprVectorDims : public ObExprVector +{ +public: + explicit ObExprVectorDims(common::ObIAllocator &alloc); + virtual ~ObExprVectorDims() {}; + virtual int calc_result_type1(ObExprResType &type, + ObExprResType &type1, + common::ObExprTypeCtx &type_ctx) + const override; + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + + static int calc_dims(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum); +private: + DISALLOW_COPY_AND_ASSIGN(ObExprVectorDims); +}; + +class ObExprVectorNorm : public ObExprVector +{ +public: + explicit ObExprVectorNorm(common::ObIAllocator &alloc); + virtual ~ObExprVectorNorm() {}; + + virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, + const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + + static int calc_norm(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum); +private: + DISALLOW_COPY_AND_ASSIGN(ObExprVectorNorm); +}; +} // sql +} // oceanbase +#endif // OCEANBASE_SQL_OB_EXPR_VECTOR \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_xml_func_helper.cpp b/src/sql/engine/expr/ob_expr_xml_func_helper.cpp index 333faf0a0b..36bf750494 100644 --- a/src/sql/engine/expr/ob_expr_xml_func_helper.cpp +++ b/src/sql/engine/expr/ob_expr_xml_func_helper.cpp @@ -1288,36 +1288,48 @@ int ObXMLExprHelper::process_sql_udt_results(common::ObObj& value, if (OB_FAIL(ret)) { } else { const uint16_t subschema_id = value.get_meta().get_subschema_id(); - ObSqlUDTMeta udt_meta; + ObSubSchemaValue sub_meta; if (OB_ISNULL(exec_context->get_physical_plan_ctx())) { // build temp subschema id - } else if (OB_FAIL(exec_context->get_sqludt_meta_by_subschema_id(subschema_id, udt_meta))) { + } else if (OB_FAIL(exec_context->get_sqludt_meta_by_subschema_id(subschema_id, sub_meta))) { LOG_WARN("failed to get udt meta", K(ret), K(subschema_id)); } if (OB_FAIL(ret)) { - } else if (!ObObjUDTUtil::ob_is_supported_sql_udt(udt_meta.udt_id_)) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("not supported to get udt meta", K(ret), K(udt_meta.udt_id_)); - } else if (!is_ps_protocol) { - ObSqlUDT sql_udt; - sql_udt.set_udt_meta(udt_meta); + } else if (sub_meta.type_ == ObSubSchemaType::OB_SUBSCHEMA_COLLECTION_TYPE) { + // array + ObSqlCollectionInfo *coll_meta = reinterpret_cast(sub_meta.value_); ObString res_str; - if (OB_FAIL(sql::ObSqlUdtUtils::convert_sql_udt_to_string(value, allocator, exec_context, - sql_udt, res_str))) { + if (OB_FAIL(sql::ObSqlUdtUtils::convert_collection_to_string(value, *coll_meta, allocator, res_str))) { LOG_WARN("failed to convert udt to string", K(ret), K(subschema_id)); } else { value.set_udt_value(res_str.ptr(), res_str.length()); } } else { - ObString udt_data = value.get_string(); - ObObj result; - if (OB_FAIL(ObSqlUdtUtils::cast_sql_record_to_pl_record(exec_context, - result, - udt_data, - udt_meta))) { - LOG_WARN("failed to cast sql collection to pl collection", K(ret), K(udt_meta.udt_id_)); + ObSqlUDTMeta udt_meta = *(reinterpret_cast(sub_meta.value_)); + if (!ObObjUDTUtil::ob_is_supported_sql_udt(udt_meta.udt_id_)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported to get udt meta", K(ret), K(udt_meta.udt_id_)); + } else if (!is_ps_protocol) { + ObSqlUDT sql_udt; + sql_udt.set_udt_meta(udt_meta); + ObString res_str; + if (OB_FAIL(sql::ObSqlUdtUtils::convert_sql_udt_to_string(value, allocator, exec_context, + sql_udt, res_str))) { + LOG_WARN("failed to convert udt to string", K(ret), K(subschema_id)); + } else { + value.set_udt_value(res_str.ptr(), res_str.length()); + } } else { - value = result; + ObString udt_data = value.get_string(); + ObObj result; + if (OB_FAIL(ObSqlUdtUtils::cast_sql_record_to_pl_record(exec_context, + result, + udt_data, + udt_meta))) { + LOG_WARN("failed to cast sql collection to pl collection", K(ret), K(udt_meta.udt_id_)); + } else { + value = result; + } } } } diff --git a/src/sql/engine/join/hash_join/ob_hash_join_struct.cpp b/src/sql/engine/join/hash_join/ob_hash_join_struct.cpp index eaddd6b73c..bf8ad71a33 100644 --- a/src/sql/engine/join/hash_join/ob_hash_join_struct.cpp +++ b/src/sql/engine/join/hash_join/ob_hash_join_struct.cpp @@ -13,6 +13,7 @@ #define USING_LOG_PREFIX SQL_ENG #include "sql/engine/join/hash_join/ob_hash_join_struct.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase { @@ -29,6 +30,13 @@ int ObHJStoredRow::convert_one_row_to_exprs(const ExprFixedArray &exprs, ObExpr *expr = exprs.at(i); if (OB_UNLIKELY(expr->is_const_expr())) { continue; + } else if (expr->is_nested_expr() && !is_uniform_format(expr->get_format(eval_ctx))) { + if (OB_FAIL(ObArrayExprUtils::nested_expr_from_rows(*expr, eval_ctx, row_meta, + reinterpret_cast(&row), 1, i, &batch_idx))) { + LOG_WARN("fail to do nested expr from rows", K(ret)); + } else { + exprs.at(i)->set_evaluated_projected(eval_ctx); + } } else { ObIVector *vec = expr->get_vector(eval_ctx); if (OB_FAIL(vec->from_row(row_meta, row, batch_idx, i))) { @@ -54,7 +62,15 @@ int ObHJStoredRow::attach_rows(const ObExprPtrIArray &exprs, ObExpr *expr = exprs.at(col_idx); if (OB_FAIL(expr->init_vector_default(ctx, selector[size - 1] + 1))) { LOG_WARN("fail to init vector", K(ret)); - } else { + } else if (expr->is_nested_expr() && !is_uniform_format(expr->get_format(ctx))) { + if (OB_FAIL(ObArrayExprUtils::nested_expr_from_rows(*expr, ctx, row_meta, + reinterpret_cast(srows), size, col_idx, + reinterpret_cast(selector)))) { + LOG_WARN("fail to do nested expr from rows", K(ret)); + } else { + expr->set_evaluated_projected(ctx); + } + } else { ObIVector *vec = expr->get_vector(ctx); if (VEC_UNIFORM_CONST != vec->get_format()) { ret = vec->from_rows(row_meta, diff --git a/src/sql/engine/join/ob_join_vec_op.cpp b/src/sql/engine/join/ob_join_vec_op.cpp index 732f4df54e..cbe1f0317e 100644 --- a/src/sql/engine/join/ob_join_vec_op.cpp +++ b/src/sql/engine/join/ob_join_vec_op.cpp @@ -49,6 +49,8 @@ int ObJoinVecOp::blank_row_batch(const ExprFixedArray &exprs, int64_t batch_size ObIVector *vec = exprs.at(col_idx)->get_vector(eval_ctx_); if (OB_UNLIKELY(VEC_UNIFORM_CONST == exprs.at(col_idx)->get_format(eval_ctx_))) { reinterpret_cast *>(vec)->set_null(0); + } else if (VEC_UNIFORM == exprs.at(col_idx)->get_format(eval_ctx_)) { + reinterpret_cast *>(vec)->set_all_null(batch_size); } else { reinterpret_cast(vec)->get_nulls()->set_all(batch_size); reinterpret_cast(vec)->set_has_null(); diff --git a/src/sql/engine/ob_exec_context.cpp b/src/sql/engine/ob_exec_context.cpp index 5291d8d379..0ff8457578 100644 --- a/src/sql/engine/ob_exec_context.cpp +++ b/src/sql/engine/ob_exec_context.cpp @@ -1184,6 +1184,21 @@ int ObExecContext::get_sqludt_meta_by_subschema_id(uint16_t subschema_id, ObSqlU return ret; } +int ObExecContext::get_sqludt_meta_by_subschema_id(uint16_t subschema_id, ObSubSchemaValue &sub_meta) +{ + int ret = OB_SUCCESS; + if (ob_is_reserved_subschema_id(subschema_id)) { + ret = OB_ERR_UNEXPECTED; + SQL_ENG_LOG(WARN, "unexpected subschema id", K(ret), K(subschema_id), K(lbt())); + } else if (OB_ISNULL(phy_plan_ctx_)) { + ret = OB_NOT_INIT; + SQL_ENG_LOG(WARN, "not phyical plan ctx for subschema mapping", K(ret), K(lbt())); + } else { + ret = phy_plan_ctx_->get_sqludt_meta_by_subschema_id(subschema_id, sub_meta); + } + return ret; +} + int ObExecContext::get_subschema_id_by_udt_id(uint64_t udt_type_id, uint16_t &subschema_id, share::schema::ObSchemaGetterGuard *schema_guard) @@ -1201,6 +1216,31 @@ int ObExecContext::get_subschema_id_by_udt_id(uint64_t udt_type_id, return ret; } +int ObExecContext::get_subschema_id_by_collection_elem_type(ObNestedType coll_type, + const ObDataType &elem_type, + uint16_t &subschema_id) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(phy_plan_ctx_)) { + ret = OB_NOT_INIT; + SQL_ENG_LOG(WARN, "not phyical plan ctx for reverse mapping", K(ret), K(lbt())); + } else { + ret = phy_plan_ctx_->get_subschema_id_by_collection_elem_type(coll_type, elem_type, subschema_id); + } + return ret; +} + +int ObExecContext::get_subschema_id_by_type_string(const ObString &type_string, uint16_t &subschema_id) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(phy_plan_ctx_)) { + ret = OB_NOT_INIT; + SQL_ENG_LOG(WARN, "not phyical plan ctx for reverse mapping", K(ret), K(lbt())); + } else { + ret = phy_plan_ctx_->get_subschema_id_by_type_string(type_string, subschema_id); + } + return ret; +} int ObExecContext::get_lob_access_ctx(ObLobAccessCtx *&lob_access_ctx) { diff --git a/src/sql/engine/ob_exec_context.h b/src/sql/engine/ob_exec_context.h index f87b5b938e..7debde00e1 100644 --- a/src/sql/engine/ob_exec_context.h +++ b/src/sql/engine/ob_exec_context.h @@ -29,6 +29,7 @@ #include "sql/engine/cmd/ob_table_direct_insert_ctx.h" #include "pl/ob_pl_package_guard.h" #include "lib/udt/ob_udt_type.h" +#include "lib/udt/ob_collection_type.h" #define GET_PHY_PLAN_CTX(ctx) ((ctx).get_physical_plan_ctx()) #define GET_MY_SESSION(ctx) ((ctx).get_my_session()) @@ -504,10 +505,14 @@ public: int get_errcode() const { return ATOMIC_LOAD(&errcode_); } hash::ObHashMap &get_dblink_snapshot_map() { return dblink_snapshot_map_; } int get_sqludt_meta_by_subschema_id(uint16_t subschema_id, ObSqlUDTMeta &udt_meta); + int get_sqludt_meta_by_subschema_id(uint16_t subschema_id, ObSubSchemaValue &sub_meta); int get_subschema_id_by_udt_id(uint64_t udt_type_id, uint16_t &subschema_id, share::schema::ObSchemaGetterGuard *schema_guard = NULL); - + int get_subschema_id_by_collection_elem_type(ObNestedType coll_type, + const ObDataType &elem_type, + uint16_t &subschema_id); + int get_subschema_id_by_type_string(const ObString &type_string, uint16_t &subschema_id); ObExecFeedbackInfo &get_feedback_info() { return fb_info_; }; inline void set_cur_rownum(int64_t cur_rownum) { user_logging_ctx_.row_num_ = cur_rownum; } inline int64_t get_cur_rownum() const { return user_logging_ctx_.row_num_; } diff --git a/src/sql/engine/ob_operator.cpp b/src/sql/engine/ob_operator.cpp index eee7605bc9..f1e8f0fda7 100644 --- a/src/sql/engine/ob_operator.cpp +++ b/src/sql/engine/ob_operator.cpp @@ -616,6 +616,74 @@ int ObOperator::output_expr_sanity_check() return ret; } +int ObOperator::output_nested_expr_sanity_check_batch(const ObExpr &expr) +{ + int ret = OB_SUCCESS; + for (uint32_t i = 0; OB_SUCC(ret) && i < expr.attrs_cnt_; ++i) { + if (OB_FAIL(output_expr_sanity_check_batch_inner(*expr.attrs_[i]))) { + LOG_WARN("check nested expr sanity failed", K(ret)); + } + } + return ret; +} + +int ObOperator::output_expr_sanity_check_batch_inner(const ObExpr &expr) +{ + int ret = OB_SUCCESS; + VectorFormat vec_fmt = expr.get_format(eval_ctx_); + ObIVector *ivec = expr.get_vector(eval_ctx_); + if (vec_fmt == VEC_UNIFORM || vec_fmt == VEC_UNIFORM_CONST) { + ObUniformBase *uni_data = static_cast(ivec); + if (vec_fmt == VEC_UNIFORM_CONST) { + if (brs_.skip_->accumulate_bit_cnt(brs_.size_) < brs_.size_) { + ObDatum &datum = uni_data->get_datums()[0]; + SANITY_CHECK_RANGE(datum.ptr_, datum.len_); + } + } else { + ObDatum *datums = uni_data->get_datums(); + for (int j = 0; j < brs_.size_; j++) { + if (!brs_.skip_->at(j)) { + SANITY_CHECK_RANGE(datums[j].ptr_, datums[j].len_); + } + } + } + } else if (vec_fmt == VEC_FIXED) { + ObFixedLengthBase *fixed_data = static_cast(ivec); + ObBitmapNullVectorBase *nulls = static_cast(ivec); + int32_t len = fixed_data->get_length(); + for (int j = 0; j < brs_.size_; j++) { + if (!brs_.skip_->at(j) && !nulls->is_null(j)) { + SANITY_CHECK_RANGE(fixed_data->get_data() + j * len, len); + } + } + } else if (vec_fmt == VEC_DISCRETE) { + ObDiscreteBase *dis_data = static_cast(ivec); + ObBitmapNullVectorBase *nulls = static_cast(ivec); + char **ptrs = dis_data->get_ptrs(); + ObLength *lens = dis_data->get_lens(); + for (int j = 0; j < brs_.size_; j++) { + if (!brs_.skip_->at(j) && !nulls->is_null(j)) { + SANITY_CHECK_RANGE(ptrs[j], lens[j]); + } + } + } else if (vec_fmt == VEC_CONTINUOUS) { + ObContinuousBase *cont_base = static_cast(ivec); + ObBitmapNullVectorBase *nulls = static_cast(ivec); + uint32_t *offsets = cont_base->get_offsets(); + char *data = cont_base->get_data(); + for (int j = 0; j < brs_.size_; j++) { + if (!brs_.skip_->at(j) && !nulls->is_null(j)) { + SANITY_CHECK_RANGE(data + offsets[j], offsets[j + 1] - offsets[j]); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected format", K(ret), K(vec_fmt)); + } + + return ret; +} + int ObOperator::output_expr_sanity_check_batch() { int ret = OB_SUCCESS; @@ -627,57 +695,12 @@ int ObOperator::output_expr_sanity_check_batch() LOG_WARN("error unexpected, expr is nullptr", K(ret)); } else if (OB_FAIL(expr->eval_vector(eval_ctx_, brs_))) { LOG_WARN("eval vector failed", K(ret)); - } else { - VectorFormat vec_fmt = expr->get_format(eval_ctx_); - ObIVector *ivec = expr->get_vector(eval_ctx_); - if (vec_fmt == VEC_UNIFORM || vec_fmt == VEC_UNIFORM_CONST) { - ObUniformBase *uni_data = static_cast(ivec); - if (vec_fmt == VEC_UNIFORM_CONST) { - if (brs_.skip_->accumulate_bit_cnt(brs_.size_) < brs_.size_) { - ObDatum &datum = uni_data->get_datums()[0]; - SANITY_CHECK_RANGE(datum.ptr_, datum.len_); - } - } else { - ObDatum *datums = uni_data->get_datums(); - for (int j = 0; j < brs_.size_; j++) { - if (!brs_.skip_->at(j)) { - SANITY_CHECK_RANGE(datums[j].ptr_, datums[j].len_); - } - } - } - } else if (vec_fmt == VEC_FIXED) { - ObFixedLengthBase *fixed_data = static_cast(ivec); - ObBitmapNullVectorBase *nulls = static_cast(ivec); - int32_t len = fixed_data->get_length(); - for (int j = 0; j < brs_.size_; j++) { - if (!brs_.skip_->at(j) && !nulls->is_null(j)) { - SANITY_CHECK_RANGE(fixed_data->get_data() + j * len, len); - } - } - } else if (vec_fmt == VEC_DISCRETE) { - ObDiscreteBase *dis_data = static_cast(ivec); - ObBitmapNullVectorBase *nulls = static_cast(ivec); - char **ptrs = dis_data->get_ptrs(); - ObLength *lens = dis_data->get_lens(); - for (int j = 0; j < brs_.size_; j++) { - if (!brs_.skip_->at(j) && !nulls->is_null(j)) { - SANITY_CHECK_RANGE(ptrs[j], lens[j]); - } - } - } else if (vec_fmt == VEC_CONTINUOUS) { - ObContinuousBase *cont_base = static_cast(ivec); - ObBitmapNullVectorBase *nulls = static_cast(ivec); - uint32_t *offsets = cont_base->get_offsets(); - char *data = cont_base->get_data(); - for (int j = 0; j < brs_.size_; j++) { - if (!brs_.skip_->at(j) && !nulls->is_null(j)) { - SANITY_CHECK_RANGE(data + offsets[j], offsets[j + 1] - offsets[j]); - } - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected format", K(ret), K(vec_fmt)); + } else if (expr->is_nested_expr() && !is_uniform_format(expr->get_format(eval_ctx_))) { + if (OB_FAIL(output_nested_expr_sanity_check_batch(*expr))) { + LOG_WARN("check nested expr sanity failed", K(ret)); } + } else if (OB_FAIL(output_expr_sanity_check_batch_inner(*expr))) { + LOG_WARN("expr sanity check batch failed", K(ret)); } } return ret; @@ -1463,7 +1486,7 @@ int ObOperator::convert_vector_format() // new operator -> old operator FOREACH_CNT_X(e, spec_.output_, OB_SUCC(ret)) { LOG_TRACE("cast to uniform", K(*e)); - if (OB_FAIL((*e)->cast_to_uniform(brs_.size_, eval_ctx_))) { + if (OB_FAIL((*e)->cast_to_uniform(brs_.size_, eval_ctx_, brs_.skip_))) { LOG_WARN("expr evaluate failed", K(ret), KPC(*e), K_(eval_ctx)); } } diff --git a/src/sql/engine/ob_operator.h b/src/sql/engine/ob_operator.h index b66227d9d1..33177965af 100644 --- a/src/sql/engine/ob_operator.h +++ b/src/sql/engine/ob_operator.h @@ -570,6 +570,8 @@ private: int check_stack_once(); int output_expr_sanity_check(); int output_expr_sanity_check_batch(); + int output_expr_sanity_check_batch_inner(const ObExpr &expr); + int output_nested_expr_sanity_check_batch(const ObExpr &expr); int output_expr_decint_datum_len_check(); int output_expr_decint_datum_len_check_batch(); int setup_op_feedback_info(); diff --git a/src/sql/engine/ob_physical_plan_ctx.cpp b/src/sql/engine/ob_physical_plan_ctx.cpp index 034a113e06..a9484d341d 100644 --- a/src/sql/engine/ob_physical_plan_ctx.cpp +++ b/src/sql/engine/ob_physical_plan_ctx.cpp @@ -1079,6 +1079,42 @@ int ObPhysicalPlanCtx::get_sqludt_meta_by_subschema_id(uint16_t subschema_id, Ob return ret; } +int ObPhysicalPlanCtx::get_sqludt_meta_by_subschema_id(uint16_t subschema_id, ObSubSchemaValue &sub_meta) +{ + int ret = OB_SUCCESS; + bool is_subschema_inited_in_plan = true; + if (subschema_id == ObMaxSystemUDTSqlType || subschema_id >= UINT_MAX16) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid subschema id", K(ret), K(subschema_id)); + } else if (OB_NOT_NULL(phy_plan_)) { // physical plan exist, use subschema ctx on phy plan + if (!phy_plan_->get_subschema_ctx().is_inited()) { + LOG_INFO("plan with empty subschema mapping", K(lbt()), K(phy_plan_->get_subschema_ctx())); + is_subschema_inited_in_plan = false; + } else if (OB_FAIL(phy_plan_->get_subschema_ctx().get_subschema(subschema_id, sub_meta))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get subschema by subschema id", K(ret), K(subschema_id)); + } else { + LOG_WARN("subschema not exist in subschema mapping", K(ret), K(subschema_id)); + } + } + } + if (OB_SUCC(ret) + && (OB_ISNULL(phy_plan_) || !is_subschema_inited_in_plan)) { + if (!subschema_ctx_.is_inited()) { // no phy plan + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid subschema id", K(ret), K(subschema_id), K(lbt())); + } else { + if (OB_FAIL(subschema_ctx_.get_subschema(subschema_id, sub_meta))) { + LOG_WARN("failed to get subschema", K(ret), K(subschema_id)); + } else if (sub_meta.type_ >= OB_SUBSCHEMA_MAX_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid subschema type", K(ret), K(sub_meta)); + } + } + } + return ret; +} + int ObPhysicalPlanCtx::get_subschema_id_by_udt_id(uint64_t udt_type_id, uint16_t &subschema_id, share::schema::ObSchemaGetterGuard *schema_guard) @@ -1154,6 +1190,53 @@ int ObPhysicalPlanCtx::get_subschema_id_by_udt_id(uint64_t udt_type_id, return ret; } +int ObPhysicalPlanCtx::get_subschema_id_by_collection_elem_type(ObNestedType coll_type, + const ObDataType &elem_type, + uint16_t &subschema_id) +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(phy_plan_)) { // physical plan exist, use subschema ctx on phy plan + if (!phy_plan_->get_subschema_ctx().is_inited()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("plan with empty subschema mapping", K(ret), K(phy_plan_->get_subschema_ctx())); + } else if (OB_FAIL(phy_plan_->get_subschema_ctx().get_subschema_id_by_typedef(coll_type, + elem_type, + subschema_id))) { + LOG_WARN("failed to get subschema id", K(ret), K(elem_type)); + } + // no phy plan + } else if (!subschema_ctx_.is_inited() && OB_FAIL(subschema_ctx_.init())) { + LOG_WARN("subschema ctx init failed", K(ret)); + } else if (OB_FAIL(subschema_ctx_.get_subschema_id_by_typedef(coll_type, elem_type, subschema_id))) { + LOG_WARN("failed to get subschema id", K(ret), K(elem_type)); + } + return ret; +} + +int ObPhysicalPlanCtx::get_subschema_id_by_type_string(const ObString &type_string, uint16_t &subschema_id) +{ + int ret = OB_SUCCESS; + bool is_subschema_inited_in_plan = true; + if (OB_NOT_NULL(phy_plan_)) { // physical plan exist, use subschema ctx on phy plan + if (!phy_plan_->get_subschema_ctx().is_inited()) { + LOG_INFO("plan with empty subschema mapping", K(lbt()), K(phy_plan_->get_subschema_ctx())); + is_subschema_inited_in_plan = false; + } else if (OB_FAIL(phy_plan_->get_subschema_ctx().get_subschema_id_by_typedef(type_string, subschema_id))) { + LOG_WARN("failed to get subschema id", K(ret)); + } + // no phy plan + } + if (OB_SUCC(ret) + && (OB_ISNULL(phy_plan_) || !is_subschema_inited_in_plan)) { + if (!subschema_ctx_.is_inited() && OB_FAIL(subschema_ctx_.init())) { + LOG_WARN("subschema ctx init failed", K(ret)); + } else if (OB_FAIL(subschema_ctx_.get_subschema_id_by_typedef(type_string, subschema_id))) { + LOG_WARN("failed to get subschema id", K(ret)); + } + } + return ret; +} + int ObPhysicalPlanCtx::set_all_local_session_vars(ObIArray &all_local_session_vars) { int ret = OB_SUCCESS; diff --git a/src/sql/engine/ob_physical_plan_ctx.h b/src/sql/engine/ob_physical_plan_ctx.h index 4e68c18079..cf9898ac01 100644 --- a/src/sql/engine/ob_physical_plan_ctx.h +++ b/src/sql/engine/ob_physical_plan_ctx.h @@ -506,10 +506,15 @@ public: bool is_rich_format() const { return enable_rich_format_; } int get_sqludt_meta_by_subschema_id(uint16_t subschema_id, ObSqlUDTMeta &udt_meta); + int get_sqludt_meta_by_subschema_id(uint16_t subschema_id, ObSubSchemaValue &sub_meta); bool is_subschema_ctx_inited(); int get_subschema_id_by_udt_id(uint64_t udt_type_id, uint16_t &subschema_id, share::schema::ObSchemaGetterGuard *schema_guard = NULL); + int get_subschema_id_by_collection_elem_type(ObNestedType coll_type, + const ObDataType &elem_type, + uint16_t &subschema_id); + int get_subschema_id_by_type_string(const ObString &type_string, uint16_t &subschema_id); int build_subschema_by_fields(const ColumnsFieldIArray *fields, share::schema::ObSchemaGetterGuard *schema_guard); int build_subschema_ctx_by_param_store(share::schema::ObSchemaGetterGuard *schema_guard); diff --git a/src/sql/engine/ob_serializable_function.h b/src/sql/engine/ob_serializable_function.h index 26d8ecbb1d..529ed67fe1 100644 --- a/src/sql/engine/ob_serializable_function.h +++ b/src/sql/engine/ob_serializable_function.h @@ -127,6 +127,12 @@ typedef void (*ser_eval_vector_function)(ObEvalVectorFuncTag &); OB_SFA_COLLECTION_EXPR_EVAL, \ OB_SFA_COLLECTION_EXPR_EVAL_BATCH, \ OB_SFA_COLLECTION_EXPR_EVAL_VEC, \ + OB_SFA_RELATION_EXPR_COLLECTION_EVAL, \ + OB_SFA_RELATION_EXPR_COLLECTION_EVAL_BATCH, \ + OB_SFA_DATUM_CMP_COLLECTION, \ + OB_SFA_DATUM_NULLSAFE_COLLECTION_CMP, \ + OB_SFA_EXPR_COLLECTION_BASIC_PART1, \ + OB_SFA_EXPR_COLLECTION_BASIC_PART2, \ OB_SFA_MAX enum ObSerFuncArrayID { diff --git a/src/sql/engine/ob_subschema_ctx.cpp b/src/sql/engine/ob_subschema_ctx.cpp index 303a258152..97243bd6e8 100644 --- a/src/sql/engine/ob_subschema_ctx.cpp +++ b/src/sql/engine/ob_subschema_ctx.cpp @@ -13,6 +13,7 @@ #define USING_LOG_PREFIX SQL_ENG #include "sql/engine/ob_subschema_ctx.h" #include "deps/oblib/src/lib/udt/ob_udt_type.h" +#include "deps/oblib/src/lib/udt/ob_array_type.h" #include "src/share/rc/ob_tenant_base.h" namespace oceanbase @@ -119,9 +120,94 @@ int subschema_value_deep_copy(const void *src_value, void return ret; } +template<> +int subschema_value_serialize(void *value, char* buf, const int64_t buf_len, int64_t& pos) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(value)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null sql udt value for seriazlie", K(ret), K(OB_SUBSCHEMA_COLLECTION_TYPE)); + } else { + const ObSqlCollectionInfo* coll_meta = reinterpret_cast(value); + if (OB_FAIL(coll_meta->serialize(buf, buf_len, pos))) { + LOG_WARN("failed to do sql udt meta seriazlie", K(ret), K(*coll_meta)); + } + } + + return ret; +} + +template <> +int subschema_value_deserialize(void *value, const char* buf, const int64_t data_len, int64_t& pos) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(value)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null sql udt value for deseriazlie", K(ret), K(OB_SUBSCHEMA_COLLECTION_TYPE)); + } else { + ObSqlCollectionInfo* coll_meta = reinterpret_cast(value); + if (OB_FAIL(coll_meta->deserialize(buf, data_len, pos))) { + LOG_WARN("failed to do sql udt meta deseriazlie", K(ret), KP(buf), K(data_len)); + } + } + return ret; +} + +template <> +int64_t subschema_value_serialize_size(void *value) +{ + int ret = OB_SUCCESS; + int64_t len = 0; + if (OB_ISNULL(value)) { + } else { + ObSqlCollectionInfo* coll_meta = reinterpret_cast(value); + len += coll_meta->get_serialize_size(); + } + return len; +} + +template <> +int subschema_value_get_signature(void *value, uint64_t &signature) +{ + int ret = OB_SUCCESS; + signature = 0; + if (OB_ISNULL(value)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null subschema value", K(ret), K(OB_SUBSCHEMA_COLLECTION_TYPE)); + } else { + const ObSqlCollectionInfo* coll_meta = reinterpret_cast(value); + signature = coll_meta->get_def_string().hash(); + } + return ret; +} + +template <> +int subschema_value_deep_copy(const void *src_value, void *&dst_value, ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(src_value)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null subschema value for deep copy", K(ret), K(OB_SUBSCHEMA_COLLECTION_TYPE)); + } else { + const ObSqlCollectionInfo* coll_meta = reinterpret_cast(src_value); + ObSqlCollectionInfo* copy_meta = NULL; + if (OB_FAIL(coll_meta->deep_copy(allocator, copy_meta))) { + LOG_WARN("failed to deep copy udt meta", K(ret), K(OB_SUBSCHEMA_COLLECTION_TYPE)); + } else if (OB_ISNULL(copy_meta)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("deep copy udt meta result is null", K(ret), K(OB_SUBSCHEMA_COLLECTION_TYPE)); + } else { + dst_value = static_cast(copy_meta); + } + } + return ret; +} + ObSubSchemaFuncs SUBSCHEMA_FUNCS[OB_SUBSCHEMA_MAX_TYPE] = { DEF_SUBSCHEMA_ENTRY(OB_SUBSCHEMA_UDT_TYPE), + DEF_SUBSCHEMA_ENTRY(OB_SUBSCHEMA_ENUM_SET_TYPE), + DEF_SUBSCHEMA_ENTRY(OB_SUBSCHEMA_COLLECTION_TYPE), }; int ObSubSchemaValue::deep_copy_value(const void *src_value, ObIAllocator &allocator) @@ -159,6 +245,27 @@ OB_DEF_DESERIALIZE(ObSubSchemaValue) signature_); if (OB_FAIL(ret)) { LOG_WARN("fail to deserialize subschema type info", K(ret), K(type_), K(signature_)); + } else if (OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is null", K(ret), K(type_), K(signature_)); + } else if (type_ == ObSubSchemaType::OB_SUBSCHEMA_UDT_TYPE) { + ObSqlUDTMeta *udt_meta = OB_NEWx(ObSqlUDTMeta, allocator_); + if (OB_ISNULL(udt_meta)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc udt meta memory failed", K(ret)); + } else { + value_ = udt_meta; + } + } else if (type_ == ObSubSchemaType::OB_SUBSCHEMA_COLLECTION_TYPE) { + ObSqlCollectionInfo *coll_meta = OB_NEWx(ObSqlCollectionInfo, allocator_, *allocator_); + if (OB_ISNULL(coll_meta)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc collection info memory failed", K(ret)); + } else { + value_ = coll_meta; + } + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(SUBSCHEMA_FUNCS[type_].value_deserialize(value_, buf, data_len, pos))) { LOG_WARN("fail to deserialize subschema data", K(ret), K(type_), K(signature_)); } @@ -216,13 +323,10 @@ OB_DEF_DESERIALIZE(ObSubSchemaCtx) for (int64_t i = 0; OB_SUCC(ret) && i < subschema_count; i++) { uint64_t subschema_id = 0; ObSubSchemaValue value; - ObSqlUDTMeta udt_meta; - value.value_ = &udt_meta; + value.allocator_ = &allocator_; OB_UNIS_DECODE(subschema_id); OB_UNIS_DECODE(value); - if (OB_FAIL(ret)) { // copy value from buffer to local memory - } else if (OB_FAIL(value.deep_copy_value(value.value_, allocator_))) { - LOG_WARN("deep copy value failed", K(ret), K(subschema_id), K(value)); + if (OB_FAIL(ret)) { } else if (OB_FAIL(set_subschema(subschema_id, value))) { LOG_WARN("fail to set subschema", K(ret), K(subschema_id), K(value)); } @@ -286,19 +390,25 @@ int ObSubSchemaCtx::init() if (is_inited_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("sub schema ctx already inited", K(ret), K(*this)); - } else if (OB_FAIL(subschema_map_.create(SUBSCHEMA_BUCKET_NUM, - "SubSchemaHash", - "SubSchemaHash", - MTL_ID()))) { - LOG_WARN("fail to create subschema map", K(ret)); - } else if (OB_FAIL(subschema_reverse_map_.create(SUBSCHEMA_BUCKET_NUM, - "SubSchemaRev", - "SubSchemaRev", - MTL_ID()))) { - LOG_WARN("fail to create subschema map", K(ret)); } else { - is_inited_ = true; - used_subschema_id_ = MAX_NON_RESERVED_SUBSCHEMA_ID; + uint64_t tenant_id = MTL_ID(); + if (tenant_id == OB_INVALID_TENANT_ID) { + tenant_id = OB_SERVER_TENANT_ID; + } + if (OB_FAIL(subschema_map_.create(SUBSCHEMA_BUCKET_NUM, + "SubSchemaHash", + "SubSchemaHash", + tenant_id))) { + LOG_WARN("fail to create subschema map", K(ret)); + } else if (OB_FAIL(subschema_reverse_map_.create(SUBSCHEMA_BUCKET_NUM, + "SubSchemaRev", + "SubSchemaRev", + tenant_id))) { + LOG_WARN("fail to create subschema map", K(ret)); + } else { + is_inited_ = true; + used_subschema_id_ = MAX_NON_RESERVED_SUBSCHEMA_ID; + } } return ret; } @@ -361,12 +471,17 @@ int ObSubSchemaCtx::set_subschema(uint16_t subschema_id, ObSubSchemaValue &value int ret = OB_SUCCESS; uint64_t key = subschema_id; ObSubSchemaValue tmp_value; + ObSubSchemaReverseKey rev_key(value.type_, value.signature_); if (OB_FAIL(subschema_map_.get_refactored(key, tmp_value))) { if (OB_HASH_NOT_EXIST != ret) { LOG_WARN("failed to get subschema", K(ret), K(key), K(tmp_value), K(value)); - } else { // not exist + } else if (value.type_ == ObSubSchemaType::OB_SUBSCHEMA_COLLECTION_TYPE) { + ObSqlCollectionInfo *meta_info = static_cast(value.value_); + rev_key.str_signature_ = meta_info->get_def_string(); + } + if (OB_HASH_NOT_EXIST == ret) { + // not exist ret = OB_SUCCESS; - ObSubSchemaReverseKey rev_key(value.type_, value.signature_); LOG_INFO("add new subschema", K(ret), K(subschema_id), K(value)); if (OB_FAIL(subschema_map_.set_refactored(key, value))) { LOG_WARN("set subschema map failed", K(ret), K(subschema_id)); @@ -402,5 +517,111 @@ int ObSubSchemaCtx::get_subschema_id(uint64_t value_signature, return ret; } +int ObSubSchemaCtx::get_subschema_id_by_typedef(const ObString &type_def, + uint16_t &subschema_id) +{ + int ret = OB_SUCCESS; + ObSubSchemaReverseKey rev_key(OB_SUBSCHEMA_COLLECTION_TYPE, type_def); + uint64_t tmp_subid = ObMaxSystemUDTSqlType; + if (OB_FAIL(subschema_reverse_map_.get_refactored(rev_key, tmp_subid))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get subschemaid from reverse map", K(ret)); + } else { + ObSqlCollectionInfo *buf = NULL; + uint16_t new_tmp_id; + char *name_def = NULL; + // construnct collection meta + if (OB_ISNULL(buf = OB_NEWx(ObSqlCollectionInfo, &allocator_, allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to create ObSqlCollectionInfo buffer", K(ret)); + } else if (OB_FAIL(get_new_subschema_id(new_tmp_id))) { + LOG_WARN("fail to get new subschema id", K(ret)); + } else if (OB_ISNULL(name_def = static_cast(allocator_.alloc(type_def.length())))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to create ObSqlCollectionInfo buffer", K(ret)); + } else { + tmp_subid = new_tmp_id; + ObString type_info; + type_info.assign_buffer(name_def, type_def.length()); + type_info.write(type_def.ptr(), type_def.length()); + buf->set_name(type_info); + if (OB_FAIL(buf->parse_type_info())) { + LOG_WARN("fail to parse ObSqlCollectionInfo", K(ret)); + } else { + ObSubSchemaValue value; + value.type_ = OB_SUBSCHEMA_COLLECTION_TYPE; + value.value_ = static_cast(buf); + uint64_t key = tmp_subid; + rev_key.str_signature_.assign_ptr(type_info.ptr(), type_info.length()); + if (OB_FAIL(subschema_map_.set_refactored(key, value))) { + LOG_WARN("set subschema map failed", K(ret), K(key)); + } else if (OB_FAIL(subschema_reverse_map_.set_refactored(rev_key, key))) { + LOG_WARN("set subschema map failed", K(ret), K(rev_key)); + int tmp_ret = subschema_map_.erase_refactored(key); + if (tmp_ret != OB_SUCCESS) { + LOG_WARN("erase subschema map failed", K(ret), K(tmp_ret), K(key)); + } + } + } + } + } + } + if (OB_SUCC(ret)) { + subschema_id = tmp_subid; + } + return ret; +} + +int ObSubSchemaCtx::get_subschema_id_by_typedef(const ObString &type_def, + uint16_t &subschema_id) const +{ + int ret = OB_SUCCESS; + ObSubSchemaReverseKey rev_key(OB_SUBSCHEMA_COLLECTION_TYPE, type_def); + uint64_t tmp_subid = ObMaxSystemUDTSqlType; + if (OB_FAIL(subschema_reverse_map_.get_refactored(rev_key, tmp_subid))) { + LOG_WARN("failed to get subschemaid from reverse map", K(ret)); + } else { + subschema_id = tmp_subid; + } + return ret; +} + +int ObSubSchemaCtx::get_subschema_id_by_typedef(ObNestedType coll_type, + const ObDataType &elem_type, + uint16_t &subschema_id) +{ + int ret = OB_SUCCESS; + const int MAX_LEN = 256; + char tmp[MAX_LEN] = {0}; + if (OB_FAIL(ObArrayUtil::get_type_name(elem_type, tmp, MAX_LEN))) { + LOG_WARN("failed to convert len to string", K(ret)); + } else { + ObString tmp_def(strlen(tmp), tmp); + if (OB_FAIL(get_subschema_id_by_typedef(tmp_def, subschema_id))) { + LOG_WARN("failed to get subschemaid by typedef", K(ret)); + } + } + return ret; +} + +int ObSubSchemaCtx::get_subschema_id_by_typedef(ObNestedType coll_type, + const ObDataType &elem_type, + uint16_t &subschema_id) const +{ + int ret = OB_SUCCESS; + const int MAX_LEN = 256; + int64_t pos = 0; + char tmp[MAX_LEN] = {0}; + if (OB_FAIL(ObArrayUtil::get_type_name(elem_type, tmp, MAX_LEN))) { + LOG_WARN("failed to convert len to string", K(ret)); + } else { + ObString tmp_def(strlen(tmp), tmp); + if (OB_FAIL(get_subschema_id_by_typedef(tmp_def, subschema_id))) { + LOG_WARN("failed to get subschemaid by typedef", K(ret)); + } + } + return ret; +} + } //sql } //oceanbase diff --git a/src/sql/engine/ob_subschema_ctx.h b/src/sql/engine/ob_subschema_ctx.h index b21a2cfe5f..dbd1da6d4f 100644 --- a/src/sql/engine/ob_subschema_ctx.h +++ b/src/sql/engine/ob_subschema_ctx.h @@ -14,6 +14,7 @@ #define OCEANBASE_SQL_OB_SUBSCHEMA_CTX_H #include "lib/oblog/ob_log_module.h" +#include "lib/udt/ob_collection_type.h" #include "common/ob_field.h" namespace oceanbase @@ -25,6 +26,7 @@ namespace sql enum ObSubSchemaType { OB_SUBSCHEMA_UDT_TYPE = 0, OB_SUBSCHEMA_ENUM_SET_TYPE = 1, + OB_SUBSCHEMA_COLLECTION_TYPE = 2, OB_SUBSCHEMA_MAX_TYPE }; @@ -43,6 +45,7 @@ public: ObSubSchemaType type_; uint64_t signature_; void *value_; + common::ObIAllocator *allocator_; // for deserialize }; typedef int (*ob_subschema_value_serialize)(void *value, char* buf, const int64_t buf_len, int64_t& pos); @@ -64,25 +67,42 @@ struct ObSubSchemaFuncs }; template - int subschema_value_serialize(void *value, char* buf, const int64_t buf_len, int64_t& pos); +int subschema_value_serialize(void *value, char* buf, const int64_t buf_len, int64_t& pos) +{ + return OB_NOT_SUPPORTED; +} template - int subschema_value_deserialize(void *value, const char* buf, const int64_t data_len, int64_t& pos); -template int64_t subschema_value_serialize_size(void *value); +int subschema_value_deserialize(void *value, const char* buf, const int64_t data_len, int64_t& pos) +{ + return OB_NOT_SUPPORTED; +} +template int64_t subschema_value_serialize_size(void *value) +{ + return OB_NOT_SUPPORTED; +} template - int subschema_value_get_signature(void *value, uint64_t &signature); +int subschema_value_get_signature(void *value, uint64_t &signature) +{ + return OB_NOT_SUPPORTED; +} template - int subschema_value_deep_copy(const void *src_value, void *&dst_value, ObIAllocator &allocator); +int subschema_value_deep_copy(const void *src_value, void *&dst_value, ObIAllocator &allocator) +{ + return OB_NOT_SUPPORTED; +} class ObSubSchemaReverseKey { public: - ObSubSchemaReverseKey() : type_(OB_SUBSCHEMA_MAX_TYPE), signature_(0) {} - ObSubSchemaReverseKey(ObSubSchemaType type, uint64_t signature) : type_(type), signature_(signature) {} + ObSubSchemaReverseKey() : type_(OB_SUBSCHEMA_MAX_TYPE), signature_(0), str_signature_() {} + ObSubSchemaReverseKey(ObSubSchemaType type, uint64_t signature) : type_(type), signature_(signature), str_signature_() {} + ObSubSchemaReverseKey(ObSubSchemaType type, ObString type_info) : type_(type), signature_(0), str_signature_(type_info) {} ~ObSubSchemaReverseKey() {} uint64_t hash() const { - return signature_ + static_cast(type_); + return type_ == OB_SUBSCHEMA_UDT_TYPE ? signature_ + static_cast(type_) + : murmurhash(str_signature_.ptr(), static_cast(str_signature_.length()), 0) + static_cast(type_); } int hash(uint64_t &res) const @@ -94,12 +114,14 @@ class ObSubSchemaReverseKey bool operator==(const ObSubSchemaReverseKey &other) const { return (other.type_ == this->type_ - && other.signature_ == this->signature_); + && other.signature_ == this->signature_ + && other.str_signature_ == this->str_signature_); } TO_STRING_KV(K_(type), K_(signature)); ObSubSchemaType type_; uint64_t signature_; + ObString str_signature_; }; class ObSubSchemaCtx @@ -135,7 +157,10 @@ public: const ObSubSchemaMap &get_subschema_map() const { return subschema_map_; } int get_subschema_id(uint64_t value_signature, ObSubSchemaType type, uint16_t &subschema_id) const; - + int get_subschema_id_by_typedef(ObNestedType coll_type, const ObDataType &elem_type, uint16_t &subschema_id); + int get_subschema_id_by_typedef(ObNestedType coll_type, const ObDataType &elem_type, uint16_t &subschema_id) const; + int get_subschema_id_by_typedef(const ObString &type_def, uint16_t &subschema_id); + int get_subschema_id_by_typedef(const ObString &type_def, uint16_t &subschema_id) const; void set_fields(const common::ObIArray *fields) { fields_ = fields; } ObIAllocator &get_allocator() { return allocator_; } diff --git a/src/sql/engine/pdml/static/ob_px_sstable_insert_op.cpp b/src/sql/engine/pdml/static/ob_px_sstable_insert_op.cpp index dc3491f7a3..cf72bf6993 100644 --- a/src/sql/engine/pdml/static/ob_px_sstable_insert_op.cpp +++ b/src/sql/engine/pdml/static/ob_px_sstable_insert_op.cpp @@ -164,6 +164,8 @@ int ObPxMultiPartSSTableInsertOp::inner_get_next_row() } else if (OB_ISNULL(table_schema)) { ret = OB_TABLE_NOT_EXIST; LOG_WARN("Table not exist", K(MY_SPEC.plan_->get_ddl_table_id()), K(ret)); + } else if (OB_FALSE_IT(is_vec_gen_vid_ = table_schema->is_vec_rowkey_vid_type())) { + } else if (OB_FALSE_IT(is_vec_data_complement_ = table_schema->is_vec_index_snapshot_data_type())) { } else if (need_count_rows() && OB_FAIL(get_all_rows_and_count())) { LOG_WARN("fail to get all rows and count", K(ret)); } else { @@ -175,6 +177,8 @@ int ObPxMultiPartSSTableInsertOp::inner_get_next_row() if (OB_ISNULL(ctx_.get_physical_plan_ctx()) || OB_ISNULL(phy_plan = ctx_.get_physical_plan_ctx()->get_phy_plan())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get phy_plan failed", K(ret), KP(ctx_.get_physical_plan_ctx()), KP(phy_plan)); + } else if (table_schema->is_vec_delta_buffer_type() || table_schema->is_vec_index_id_type()) { + all_slices_empty = true; } else if (OB_FAIL(get_next_row_with_cache())) {// get one row first for calc part_id if (OB_UNLIKELY(OB_ITER_END != ret)) { LOG_WARN("fail get next row from child", K(ret)); @@ -227,7 +231,8 @@ int ObPxMultiPartSSTableInsertOp::inner_get_next_row() notify_tablet_id, table_schema->get_rowkey_column_num(), snapshot_version_, - slice_info.context_id_, parallel_idx))) { + slice_info.context_id_, parallel_idx, + is_vec_data_complement_))) { LOG_WARN("init failed", K(ret)); } else if (OB_FAIL(tenant_direct_load_mgr->fill_sstable_slice(slice_info, &row_iter, @@ -299,7 +304,11 @@ int ObPxMultiPartSSTableInsertOp::get_next_row_with_cache() break; } } - if (OB_ISNULL(auto_inc_expr)) { + if (OB_SUCC(ret) && is_vec_gen_vid_ && child_expr.count() > 0) { + auto_inc_expr = child_expr.at(child_expr.count() - 1); + } + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(auto_inc_expr)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("cannot find tablet autoinc expr", K(child_->get_spec().output_)); } else if (curr_tablet_idx_ < 0 || curr_tablet_idx_ >= tablet_seq_caches_.count()) { @@ -316,7 +325,13 @@ int ObPxMultiPartSSTableInsertOp::get_next_row_with_cache() if (OB_SUCC(ret)) { ObDatum &datum = auto_inc_expr->locate_datum_for_write(eval_ctx_); - datum.set_uint(next_autoinc_val); + ObTabletID tablet_id = tablet_seq_caches_.at(curr_tablet_idx_).tablet_id_; + if (is_vec_gen_vid_) { + // TODO @lhd make vid into struct + datum.set_uint(next_autoinc_val); + } else { + datum.set_uint(next_autoinc_val); + } auto_inc_expr->set_evaluated_projected(eval_ctx_); } } diff --git a/src/sql/engine/pdml/static/ob_px_sstable_insert_op.h b/src/sql/engine/pdml/static/ob_px_sstable_insert_op.h index 0c430f840a..8f1fd7ab82 100644 --- a/src/sql/engine/pdml/static/ob_px_sstable_insert_op.h +++ b/src/sql/engine/pdml/static/ob_px_sstable_insert_op.h @@ -73,7 +73,9 @@ public: count_rows_finish_(false), is_all_partition_finished_(false), curr_part_idx_(0), - snapshot_version_(0) + snapshot_version_(0), + is_vec_data_complement_(false), + is_vec_gen_vid_(false) {} virtual ~ObPxMultiPartSSTableInsertOp() { destroy(); } const ObPxMultiPartSSTableInsertSpec &get_spec() const; @@ -101,7 +103,7 @@ private: private: int get_all_rows_and_count(); int create_tablet_store(common::ObTabletID &tablet_id, ObChunkDatumStore *&tablet_store); - bool need_count_rows() const { return MY_SPEC.regenerate_heap_table_pk_ && !count_rows_finish_; } + bool need_count_rows() const { return (MY_SPEC.regenerate_heap_table_pk_ || is_vec_gen_vid_) && !count_rows_finish_; } int get_next_tablet_id(common::ObTabletID &tablet_id); private: friend class storage::ObSSTableInsertRowIterator; @@ -118,6 +120,9 @@ private: bool is_all_partition_finished_; int64_t curr_part_idx_; int64_t snapshot_version_; // ddl snapshot version. + // vector index + bool is_vec_data_complement_; + bool is_vec_gen_vid_; DISALLOW_COPY_AND_ASSIGN(ObPxMultiPartSSTableInsertOp); }; diff --git a/src/sql/engine/px/exchange/ob_px_dist_transmit_op.cpp b/src/sql/engine/px/exchange/ob_px_dist_transmit_op.cpp index 91719e15df..1b913f92ff 100644 --- a/src/sql/engine/px/exchange/ob_px_dist_transmit_op.cpp +++ b/src/sql/engine/px/exchange/ob_px_dist_transmit_op.cpp @@ -910,7 +910,7 @@ int ObPxDistTransmitOp::add_batch_row_for_piece_msg_vec(ObChunkDatumStore &sampl if (cnt > 0) { ret = inner_get_next_batch(cnt); FOREACH_CNT_X(e, MY_SPEC.sampling_saving_row_, OB_SUCC(ret)) { - if (OB_FAIL((*e)->cast_to_uniform(brs_.size_, eval_ctx_))) { + if (OB_FAIL((*e)->cast_to_uniform(brs_.size_, eval_ctx_, brs_.skip_))) { LOG_WARN("cast expr to uniform failed", K(ret), KPC(*e), K_(eval_ctx)); } } diff --git a/src/sql/engine/px/exchange/ob_px_transmit_op.cpp b/src/sql/engine/px/exchange/ob_px_transmit_op.cpp index 5c19640a43..c6aba8093c 100644 --- a/src/sql/engine/px/exchange/ob_px_transmit_op.cpp +++ b/src/sql/engine/px/exchange/ob_px_transmit_op.cpp @@ -1142,6 +1142,22 @@ void ObPxTransmitOp::fill_batch_ptrs_fixed(const int64_t *indexes) } } +int ObPxTransmitOp::prepare_for_nested_expr() +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < get_spec().output_.count(); ++i) { + ObExpr *expr = get_spec().output_.at(i); + if (expr->is_nested_expr() && !is_uniform_format(expr->get_format(eval_ctx_))) { + if (OB_FAIL(expr->nested_cast_to_uniform(brs_.size_, eval_ctx_, brs_.skip_))) { + LOG_WARN("failed to cast nested expr to uniform", K(ret)); + } else { + params_.vectors_.at(i) = expr->get_vector(eval_ctx_); + } + } + } + return ret; +} + int ObPxTransmitOp::keep_order_send_batch(ObEvalCtx::BatchInfoScopeGuard &batch_info_guard, const int64_t *indexes) { int ret = OB_SUCCESS; @@ -1155,6 +1171,8 @@ int ObPxTransmitOp::keep_order_send_batch(ObEvalCtx::BatchInfoScopeGuard &batch_ } } if (OB_FAIL(ret)) { + } else if (OB_FAIL(prepare_for_nested_expr())) { + LOG_WARN("failed to prepare for nested expr", K(ret)); } else if (OB_FAIL(ObTempRowStore::DtlRowBlock::calc_rows_size(params_.vectors_, params_.meta_, brs_, params_.row_size_array_))) { LOG_WARN("failed to calc size", K(ret)); @@ -1791,6 +1809,8 @@ int ObPxTransmitOp::hash_reorder_send_batch(ObEvalCtx::BatchInfoScopeGuard &batc } const ObPxTransmitSpec &spec = static_cast(get_spec()); if (OB_FAIL(ret)) { + } else if (OB_FAIL(prepare_for_nested_expr())) { + LOG_WARN("failed to prepare for nested expr", K(ret)); } else { switch (data_msg_type_) { case dtl::ObDtlMsgType::PX_VECTOR_FIXED: { diff --git a/src/sql/engine/px/exchange/ob_px_transmit_op.h b/src/sql/engine/px/exchange/ob_px_transmit_op.h index 48e0312512..4bca3f2d95 100644 --- a/src/sql/engine/px/exchange/ob_px_transmit_op.h +++ b/src/sql/engine/px/exchange/ob_px_transmit_op.h @@ -195,6 +195,7 @@ private: int try_wait_channel(); void init_data_msg_type(const common::ObIArray &output); void fill_batch_ptrs(const int64_t *indexes); + int prepare_for_nested_expr(); void fill_batch_ptrs_fixed(const int64_t *indexes); dtl::ObDtlMsgType get_data_msg_type() const { return data_msg_type_; } protected: diff --git a/src/sql/engine/px/ob_px_row_store.cpp b/src/sql/engine/px/ob_px_row_store.cpp index 074f9eace5..67d7adee6c 100644 --- a/src/sql/engine/px/ob_px_row_store.cpp +++ b/src/sql/engine/px/ob_px_row_store.cpp @@ -21,6 +21,7 @@ #include "share/vector/ob_continuous_base.h" #include "share/vector/ob_fixed_length_base.h" #include "share/vector/ob_uniform_base.h" +#include "sql/engine/expr/ob_array_expr_utils.h" using namespace oceanbase::common; @@ -539,7 +540,11 @@ int ObReceiveRowReader::attach_vectors(const common::ObIArray &exprs, } else { ObExpr *e = exprs.at(col_idx); ObIVector *vec = e->get_vector(eval_ctx); - if (OB_FAIL(vec->from_rows(meta, srows, read_rows, col_idx))) { + if (e->is_nested_expr() && !is_uniform_format(e->get_format(eval_ctx))) { + if (OB_FAIL(ObArrayExprUtils::nested_expr_from_rows(*e, eval_ctx, meta, srows, read_rows, col_idx))) { + LOG_WARN("fail to do nested expr from rows", K(ret)); + } + } else if (OB_FAIL(vec->from_rows(meta, srows, read_rows, col_idx))) { LOG_WARN("failed to fill vector", K(ret)); } e->set_evaluated_projected(eval_ctx); diff --git a/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.cpp b/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.cpp index 81407f9ab7..d1b584d78b 100644 --- a/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.cpp +++ b/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.cpp @@ -1038,7 +1038,11 @@ int ObRFInFilterVecMsg::ObRFInFilterRowStore::create_and_add_row( for (int64_t i = 0; i < exprs.count() && OB_SUCC(ret); ++i) { ObExpr *expr = exprs.at(i); ObIVector *vec = expr->get_vector(ctx); - OZ(vec->to_row(row_meta, row, batch_idx, i)); + if (expr->is_nested_expr() && !is_uniform_format(vec->get_format())) { + OZ(ObCompactRow::nested_vec_to_row(*expr, ctx, row_meta, row, batch_idx, i)); + } else { + OZ(vec->to_row(row_meta, row, batch_idx, i)); + } } if (OB_FAIL(ret)) { } else if (FALSE_IT(row->extra_payload(row_meta) = hash_val)) { diff --git a/src/sql/engine/sort/ob_sort_key_fetcher_vec_op.cpp b/src/sql/engine/sort/ob_sort_key_fetcher_vec_op.cpp index 8c0096d5d1..c25170e9b0 100644 --- a/src/sql/engine/sort/ob_sort_key_fetcher_vec_op.cpp +++ b/src/sql/engine/sort/ob_sort_key_fetcher_vec_op.cpp @@ -71,7 +71,10 @@ int ObSortKeyFetcher::init(const common::ObIArray &sk_exprs, const ObSortFieldCollation &sort_collation = sort_collations.at(i); const ObExpr *e = sk_exprs.at(sort_collation.field_idx_); ObIVector *vec = e->get_vector(eval_ctx); - if (OB_FAIL(sk_vec_ptrs_.push_back(vec))) { + if (e->is_nested_expr()) { + ret = OB_NOT_SUPPORTED; + SQL_ENG_LOG(WARN, "nested expr is not supported", K(ret)); + } else if (OB_FAIL(sk_vec_ptrs_.push_back(vec))) { SQL_ENG_LOG(WARN, "failed to add expr vector", K(ret)); } } diff --git a/src/sql/engine/sort/ob_sort_vec_op_impl.h b/src/sql/engine/sort/ob_sort_vec_op_impl.h index d6f23c1e2d..5717598596 100644 --- a/src/sql/engine/sort/ob_sort_vec_op_impl.h +++ b/src/sql/engine/sort/ob_sort_vec_op_impl.h @@ -25,6 +25,7 @@ #include "sql/engine/sort/ob_sort_key_fetcher_vec_op.h" #include "sql/engine/sort/ob_sort_vec_op_eager_filter.h" #include "sql/engine/sort/ob_sort_vec_op_store_row_factory.h" +#include "sql/engine/expr/ob_array_expr_utils.h" #include "observer/omt/ob_tenant_config_mgr.h" #include "sql/engine/sort/ob_pd_topn_sort_filter.h" diff --git a/src/sql/engine/sort/ob_sort_vec_op_impl.ipp b/src/sql/engine/sort/ob_sort_vec_op_impl.ipp index f6cbdf8fd9..774c58283d 100644 --- a/src/sql/engine/sort/ob_sort_vec_op_impl.ipp +++ b/src/sql/engine/sort/ob_sort_vec_op_impl.ipp @@ -522,7 +522,9 @@ int ObSortVecOpImpl::build_row( for (int64_t i = 0; i < exprs.count() && OB_SUCC(ret); ++i) { ObExpr *expr = exprs.at(i); ObIVector *vec = expr->get_vector(ctx); - if (OB_FAIL(vec->to_row(row_meta, stored_row, batch_idx, i))) { + if (expr->is_nested_expr() && !is_uniform_format(vec->get_format())) { + OZ(ObCompactRow::nested_vec_to_row(*expr, ctx, row_meta, stored_row, batch_idx, i)); + } else if (OB_FAIL(vec->to_row(row_meta, stored_row, batch_idx, i))) { SQL_ENG_LOG(WARN, "failed to to row", K(ret), K(expr)); } } @@ -812,8 +814,13 @@ int ObSortVecOpImpl::attach_rows(const ObExprPtrI } else if (OB_FAIL(exprs.at(col_idx)->init_vector_default(ctx, read_rows))) { LOG_WARN("fail to init vector", K(ret)); } else { + ObExpr *e = exprs.at(col_idx); ObIVector *vec = exprs.at(col_idx)->get_vector(ctx); - if (VEC_UNIFORM_CONST != vec->get_format()) { + if (e->is_nested_expr() && !is_uniform_format(vec->get_format())) { + if (OB_FAIL(ObArrayExprUtils::nested_expr_from_rows(*e, ctx, row_meta, srows, read_rows, col_idx))) { + LOG_WARN("fail to do nested expr from rows", K(ret)); + } + } else if (VEC_UNIFORM_CONST != vec->get_format()) { ret = vec->from_rows(row_meta, srows, read_rows, col_idx); exprs.at(col_idx)->set_evaluated_projected(ctx); } diff --git a/src/sql/engine/subquery/ob_subplan_scan_op.cpp b/src/sql/engine/subquery/ob_subplan_scan_op.cpp index ac07597c8d..05ee35b921 100644 --- a/src/sql/engine/subquery/ob_subplan_scan_op.cpp +++ b/src/sql/engine/subquery/ob_subplan_scan_op.cpp @@ -142,6 +142,30 @@ int ObSubPlanScanOp::next_batch(const int64_t max_row_cnt) return ret; } +int ObSubPlanScanOp::nested_next_vector(ObExpr &from, ObExpr &to) +{ + int ret = OB_SUCCESS; + if (!from.is_nested_expr() || !to.is_nested_expr()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected expr type", K(ret)); + } else if (from.attrs_cnt_ != to.attrs_cnt_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected expr type", K(ret), K(from.attrs_cnt_), K(to.attrs_cnt_ )); + } + for (uint32_t i = 0; OB_SUCC(ret) && i < from.attrs_cnt_; ++i) { + VectorHeader &from_attr_vec_header = from.attrs_[i]->get_vector_header(eval_ctx_); + VectorHeader &to_attr_vec_header = to.attrs_[i]->get_vector_header(eval_ctx_); + if (is_uniform_format(from_attr_vec_header.format_) + || is_uniform_format(to_attr_vec_header.format_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected format type", K(ret), K(from_attr_vec_header.format_), K(to_attr_vec_header.format_)); + } else { + to_attr_vec_header = from_attr_vec_header; + } + } + return ret; +} + int ObSubPlanScanOp::next_vector(const int64_t max_row_cnt) { int ret = OB_SUCCESS; @@ -181,6 +205,9 @@ int ObSubPlanScanOp::next_vector(const int64_t max_row_cnt) OZ(to->init_vector(eval_ctx_, VEC_UNIFORM, brs_.size_)); } else { to_vec_header = from_vec_header; + if (from->is_nested_expr()) { + OZ(nested_next_vector(*from, *to)); + } } // init eval info if (OB_SUCC(ret)) { diff --git a/src/sql/engine/subquery/ob_subplan_scan_op.h b/src/sql/engine/subquery/ob_subplan_scan_op.h index ac3011790e..e190de0d3a 100644 --- a/src/sql/engine/subquery/ob_subplan_scan_op.h +++ b/src/sql/engine/subquery/ob_subplan_scan_op.h @@ -50,6 +50,7 @@ private: int init_monitor_info(); int next_vector(const int64_t max_row_cnt); int next_batch(const int64_t max_row_cnt); + int nested_next_vector(ObExpr &from, ObExpr &to); }; } // end namespace sql diff --git a/src/sql/engine/table/ob_odps_table_row_iter.cpp b/src/sql/engine/table/ob_odps_table_row_iter.cpp index 5888515e38..f6110d0b06 100644 --- a/src/sql/engine/table/ob_odps_table_row_iter.cpp +++ b/src/sql/engine/table/ob_odps_table_row_iter.cpp @@ -329,6 +329,7 @@ int ObODPSTableRowIterator::print_type_map_user_info(apsara::odps::sdk::ODPSColu const char* ob_type_cstr = ""; if (OB_NOT_NULL(ob_type_expr)) { ObArrayWrap buf; + ObArray extend_info; int64_t pos = 0; ob_type_cstr = ob_obj_type_str(ob_type_expr->datum_meta_.type_); if (OB_SUCCESS == buf.allocate_array(arena_alloc_, 128)) { // 128 is enough to hold user info str @@ -337,7 +338,8 @@ int ObODPSTableRowIterator::print_type_map_user_info(apsara::odps::sdk::ODPSColu ob_type_expr->max_length_, ob_type_expr->datum_meta_.precision_, ob_type_expr->datum_meta_.scale_, - ob_type_expr->datum_meta_.cs_type_); + ob_type_expr->datum_meta_.cs_type_, + extend_info); if (pos < buf.count()) { buf.at(pos++) = '\0'; ob_type_cstr = buf.get_data(); diff --git a/src/sql/engine/table/ob_orc_table_row_iter.cpp b/src/sql/engine/table/ob_orc_table_row_iter.cpp index c1b4996a68..4a090b0b3b 100644 --- a/src/sql/engine/table/ob_orc_table_row_iter.cpp +++ b/src/sql/engine/table/ob_orc_table_row_iter.cpp @@ -332,11 +332,12 @@ int ObOrcTableRowIterator::next_file() "INVALID ORC TYPE" : row_reader_->getSelectedType().getSubtype(i)->toString(); int64_t pos = 0; ObArrayWrap buf; + ObArray extended_type_info; ObDatumMeta &meta = file_column_exprs_.at(i)->datum_meta_; const char *ob_type = ob_obj_type_str(file_column_exprs_.at(i)->datum_meta_.type_); if (OB_SUCCESS == buf.allocate_array(allocator_, 100)) { ob_sql_type_str(buf.get_data(), buf.count(), pos, meta.type_, - OB_MAX_VARCHAR_LENGTH, meta.precision_, meta.scale_, meta.cs_type_); + OB_MAX_VARCHAR_LENGTH, meta.precision_, meta.scale_, meta.cs_type_, extended_type_info); if (pos < buf.count()) { buf.at(pos++) = '\0'; ob_type = buf.get_data(); diff --git a/src/sql/engine/table/ob_parquet_table_row_iter.cpp b/src/sql/engine/table/ob_parquet_table_row_iter.cpp index 4d504d322c..360ac10fe5 100644 --- a/src/sql/engine/table/ob_parquet_table_row_iter.cpp +++ b/src/sql/engine/table/ob_parquet_table_row_iter.cpp @@ -353,8 +353,21 @@ int ObParquetTableRowIterator::next_file() ObDatumMeta &meta = file_column_exprs_.at(i)->datum_meta_; const char *ob_type = ob_obj_type_str(file_column_exprs_.at(i)->datum_meta_.type_); if (OB_SUCCESS == buf.allocate_array(allocator_, 100)) { + ObArray extended_type_info; + if (ob_is_collection_sql_type(meta.type_)) { + int tmp_ret = OB_SUCCESS; + const ObSqlCollectionInfo *coll_info = NULL; + uint16_t subschema_id = file_column_exprs_.at(i)->obj_meta_.get_subschema_id(); + ObSubSchemaValue value; + if (OB_SUCCESS != (tmp_ret = eval_ctx.exec_ctx_.get_sqludt_meta_by_subschema_id(subschema_id, value))) { + LOG_WARN("failed to get subschema ctx", K(tmp_ret)); + } else if (FALSE_IT(coll_info = reinterpret_cast(value.value_))) { + } else if (OB_SUCCESS != (tmp_ret = extended_type_info.push_back(coll_info->get_def_string()))) { + LOG_WARN("failed to push back to array", K(tmp_ret), KPC(coll_info)); + } + } ob_sql_type_str(buf.get_data(), buf.count(), pos, meta.type_, - OB_MAX_VARCHAR_LENGTH, meta.precision_, meta.scale_, meta.cs_type_); + OB_MAX_VARCHAR_LENGTH, meta.precision_, meta.scale_, meta.cs_type_, extended_type_info); if (pos < buf.count()) { buf.at(pos++) = '\0'; ob_type = buf.get_data(); diff --git a/src/sql/engine/table/ob_table_scan_op.cpp b/src/sql/engine/table/ob_table_scan_op.cpp index 83e82f79b8..dd70d1a4bb 100644 --- a/src/sql/engine/table/ob_table_scan_op.cpp +++ b/src/sql/engine/table/ob_table_scan_op.cpp @@ -243,6 +243,32 @@ ObDASScanCtDef *ObTableScanCtDef::get_lookup_ctdef() return lookup_ctdef; } +ObDASScanCtDef *ObTableScanCtDef::get_rowkey_vid_ctdef() +{ + ObDASScanCtDef *rowkey_vid_ctdef = nullptr; + const ObDASBaseCtDef *attach_ctdef = attach_spec_.attach_ctdef_; + if (OB_NOT_NULL(attach_ctdef)) { + /** + * The iter tree of das scan with vid: + * + * CASE 1: Partition Scan Tree + * + * DOC_ID_MERGE_ITER + * / \ + * / \ + * DAS_SCAN_ITER(DataTable) DAS_SCAN_ITER(RowkeyVid) + * + * + * + **/ + if (DAS_OP_VID_MERGE == attach_ctdef->op_type_) { + OB_ASSERT(2 == attach_ctdef->children_cnt_ && attach_ctdef->children_ != nullptr); + rowkey_vid_ctdef = static_cast(attach_ctdef->children_[1]); + } + } + return rowkey_vid_ctdef; +} + int ObTableScanCtDef::allocate_dppr_table_loc() { int ret = OB_SUCCESS; diff --git a/src/sql/engine/table/ob_table_scan_op.h b/src/sql/engine/table/ob_table_scan_op.h index 5ea75621ae..757426e54d 100644 --- a/src/sql/engine/table/ob_table_scan_op.h +++ b/src/sql/engine/table/ob_table_scan_op.h @@ -26,6 +26,7 @@ #include "sql/das/ob_das_scan_op.h" #include "sql/das/ob_das_attach_define.h" #include "sql/das/ob_das_ir_define.h" +#include "sql/das/ob_das_vec_define.h" #include "sql/engine/basic/ob_pushdown_filter.h" #include "sql/engine/table/ob_index_lookup_op_impl.h" #include "sql/das/iter/ob_das_iter.h" @@ -167,6 +168,7 @@ public: } int allocate_dppr_table_loc(); ObDASScanCtDef *get_lookup_ctdef(); + ObDASScanCtDef *get_rowkey_vid_ctdef(); TO_STRING_KV(K_(pre_query_range), K_(flashback_item), K_(bnlj_param_idxs), diff --git a/src/sql/executor/ob_execute_result.cpp b/src/sql/executor/ob_execute_result.cpp index 491d86b631..19a5ad7310 100644 --- a/src/sql/executor/ob_execute_result.cpp +++ b/src/sql/executor/ob_execute_result.cpp @@ -16,6 +16,7 @@ #include "lib/ash/ob_active_session_guard.h" #include "sql/engine/ob_operator.h" #include "sql/engine/ob_exec_context.h" +#include "sql/engine/expr/ob_array_expr_utils.h" using namespace oceanbase::common; namespace oceanbase diff --git a/src/sql/ob_sql.cpp b/src/sql/ob_sql.cpp index b8fea35ef9..1be130702d 100644 --- a/src/sql/ob_sql.cpp +++ b/src/sql/ob_sql.cpp @@ -832,7 +832,13 @@ int ObSql::fill_select_result_set(ObResultSet &result_set, ObSqlCtx *context, co field.type_.meta_.set_ext(); field.accuracy_.set_accuracy(T_OBJ_SDO_GEOMETRY); } - if (OB_FAIL(result_set.get_exec_context().get_subschema_id_by_udt_id(udt_id, tmp_subschema_id))) { + if (expr->get_result_type().is_collection_sql_type() + && !ObObjUDTUtil::ob_is_supported_sql_udt(udt_id)) { + // array type + field.type_.set_subschema_id(subschema_id); + field.charsetnr_ = CS_TYPE_BINARY; + field.length_ = OB_MAX_LONGTEXT_LENGTH; + } else if (OB_FAIL(result_set.get_exec_context().get_subschema_id_by_udt_id(udt_id, tmp_subschema_id))) { LOG_WARN("unsupported udt id", K(ret), K(subschema_id)); } else if (OB_FAIL(result_set.get_exec_context().get_sqludt_meta_by_subschema_id(tmp_subschema_id, udt_meta))) { LOG_WARN("failed to get udt meta", K(ret), K(tmp_subschema_id)); @@ -847,15 +853,15 @@ int ObSql::fill_select_result_set(ObResultSet &result_set, ObSqlCtx *context, co field.type_.set_subschema_id(tmp_subschema_id); field.charsetnr_ = CS_TYPE_BINARY; field.length_ = OB_MAX_LONGTEXT_LENGTH; + if (OB_SUCC(ret)) { + if (OB_FAIL(ob_write_string(alloc, ObString(udt_meta.udt_name_len_, udt_meta.udt_name_), field.type_name_))) { + LOG_WARN("fail to alloc string", K(i), K(field), K(ret)); + } + } } else { ret = OB_NOT_SUPPORTED; LOG_WARN("udt type not supported", K(ret), K(tmp_subschema_id)); } - if (OB_SUCC(ret)) { - if (OB_FAIL(ob_write_string(alloc, ObString(udt_meta.udt_name_len_, udt_meta.udt_name_), field.type_name_))) { - LOG_WARN("fail to alloc string", K(i), K(field), K(ret)); - } - } } else if (expr->get_result_type().is_ext() && OB_INVALID_ID != expr->get_result_type().get_udt_id() && (PL_VARRAY_TYPE == expr->get_result_type().get_extend_type() diff --git a/src/sql/ob_sql_context.cpp b/src/sql/ob_sql_context.cpp index 036f02edf3..c379f6997b 100644 --- a/src/sql/ob_sql_context.cpp +++ b/src/sql/ob_sql_context.cpp @@ -539,7 +539,8 @@ int ObSqlSchemaGuard::get_can_read_index_array(uint64_t table_id, bool with_mv, bool with_global_index, bool with_domain_index, - bool with_spatial_index) + bool with_spatial_index, + bool with_vector_index) { int ret = OB_SUCCESS; const uint64_t tenant_id = MTL_ID(); @@ -547,7 +548,7 @@ int ObSqlSchemaGuard::get_can_read_index_array(uint64_t table_id, OZ (schema_guard_->get_can_read_index_array(tenant_id, table_id, index_tid_array, size, with_mv, with_global_index, with_domain_index, - with_spatial_index)); + with_spatial_index, with_vector_index)); return ret; } diff --git a/src/sql/ob_sql_context.h b/src/sql/ob_sql_context.h index b4d8bd73c1..29516e2211 100644 --- a/src/sql/ob_sql_context.h +++ b/src/sql/ob_sql_context.h @@ -475,7 +475,8 @@ public: bool with_mv, bool with_global_index = true, bool with_domain_index = true, - bool with_spatial_index = true); + bool with_spatial_index = true, + bool with_vector_index = true); int get_table_mlog_schema(const uint64_t table_id, const ObTableSchema *&mlog_schema); int get_link_table_schema(uint64_t table_id, const share::schema::ObTableSchema *&table_schema) const; diff --git a/src/sql/ob_sql_define.h b/src/sql/ob_sql_define.h index 96cc79b51f..1c53c64dbc 100644 --- a/src/sql/ob_sql_define.h +++ b/src/sql/ob_sql_define.h @@ -690,7 +690,8 @@ static bool is_fixed_length(ObObjType type) { || ObGeometryTC == tc || ObUserDefinedSQLTC == tc || ObDecimalIntTC == tc - || ObRoaringBitmapTC == tc) { + || ObRoaringBitmapTC == tc + || ObCollectionSQLTC == tc) { is_fixed = false; } return is_fixed; diff --git a/src/sql/ob_sql_utils.cpp b/src/sql/ob_sql_utils.cpp index 49f70efa7a..2f6d19d354 100644 --- a/src/sql/ob_sql_utils.cpp +++ b/src/sql/ob_sql_utils.cpp @@ -810,6 +810,13 @@ int ObSQLUtils::se_calc_const_expr(ObSQLSessionInfo *session, if (NULL != out_ctx->get_original_package_guard()) { exec_ctx.set_package_guard(out_ctx->get_original_package_guard()); } + if (NULL != out_ctx->get_physical_plan_ctx()) { + ObSubSchemaCtx & subschema_ctx = out_ctx->get_physical_plan_ctx()->get_subschema_ctx(); + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(exec_ctx.get_physical_plan_ctx()->get_subschema_ctx().assgin(subschema_ctx))) { + LOG_WARN("failed to assgin subschema_ctx", K(tmp_ret)); + } + } } void *frame_buf = NULL; ObPreCalcExprFrameInfo *pre_calc_frame = NULL; diff --git a/src/sql/optimizer/ob_del_upd_log_plan.cpp b/src/sql/optimizer/ob_del_upd_log_plan.cpp index eada9ab4b5..118442bcae 100644 --- a/src/sql/optimizer/ob_del_upd_log_plan.cpp +++ b/src/sql/optimizer/ob_del_upd_log_plan.cpp @@ -1800,7 +1800,7 @@ int ObDelUpdLogPlan::collect_related_local_index_ids(IndexDMLInfo &primary_dml_i if (OB_FAIL(primary_dml_info.related_index_ids_.push_back(index_schema->get_table_id()))) { LOG_WARN("add related index ids failed", K(ret)); } - } else if (primary_dml_info.is_update_part_key_ && index_schema->is_fts_index()) { + } else if (primary_dml_info.is_update_part_key_ && (index_schema->is_fts_index() || index_schema->is_vec_index())) { // If part key is updated and it is fts index, need to be added into the related index ids. if (OB_FAIL(primary_dml_info.related_index_ids_.push_back(index_schema->get_table_id()))) { LOG_WARN("add related index ids failed", K(ret)); diff --git a/src/sql/optimizer/ob_index_info_cache.h b/src/sql/optimizer/ob_index_info_cache.h index a51ab6dd46..661ce18889 100644 --- a/src/sql/optimizer/ob_index_info_cache.h +++ b/src/sql/optimizer/ob_index_info_cache.h @@ -141,6 +141,7 @@ public: is_geo_index_(false), is_fulltext_index_(false), is_multivalue_index_(false), + is_vector_index_(false), range_info_(), ordering_info_(), interesting_order_info_(OrderingFlag::NOT_MATCH), @@ -176,6 +177,8 @@ public: void set_is_index_geo(const bool is_index_geo) { is_geo_index_ = is_index_geo; } bool is_fulltext_index() const { return is_fulltext_index_; } void set_is_fulltext_index(const bool is_fulltext_index) { is_fulltext_index_ = is_fulltext_index; } + bool is_vector_index() const { return is_vector_index_; } + void set_is_vector_index(const bool is_vector_index) { is_vector_index_ = is_vector_index; } void set_partition_info(ObTablePartitionInfo *partition_info) { partition_info_ = partition_info; } ObTablePartitionInfo *get_partition_info() const { return partition_info_; } void set_sharding_info(ObShardingInfo *sharding_info) { sharding_info_ = sharding_info; } @@ -193,6 +196,7 @@ private: bool is_geo_index_; bool is_fulltext_index_; bool is_multivalue_index_; + bool is_vector_index_; QueryRangeInfo range_info_; OrderingInfo ordering_info_; int64_t interesting_order_info_; // 记录索引的序在stmt中的哪些地方用到 e.g. join, group by, order by diff --git a/src/sql/optimizer/ob_insert_log_plan.cpp b/src/sql/optimizer/ob_insert_log_plan.cpp index 4ee1c71bbb..5af72aa105 100644 --- a/src/sql/optimizer/ob_insert_log_plan.cpp +++ b/src/sql/optimizer/ob_insert_log_plan.cpp @@ -1463,13 +1463,19 @@ int ObInsertLogPlan::prepare_table_dml_info_for_ddl(const ObInsertTableInfo& tab //@TODO: 后续@yibo, @cangdi会重构create local index的处理 index_dml_info->ref_table_id_ = table_item->ddl_table_id_; } - + bool need_all_part = index_schema->is_index_table() && !index_schema->is_global_index_table() && data_table_schema->is_heap_table(); + if (optimizer_context_.is_online_ddl()) { + need_all_part = need_all_part || (index_schema->is_partitioned_table() && + (index_schema->is_vec_delta_buffer_type() || + index_schema->is_vec_index_id_type() || + index_schema->is_vec_index_snapshot_data_type())); + } if (OB_SUCC(ret)) { if (OB_FAIL(get_all_rowkey_columns_for_ddl(table_info, index_schema, index_dml_info->column_exprs_))) { LOG_WARN("failed to get all rowkey columns for ddl" , K(ret)); } else if (OB_FAIL(get_all_columns_for_ddl(table_info, index_schema, index_dml_info->column_exprs_))) { LOG_WARN("failed to get all columns for ddl" , K(ret)); - } else if (index_schema->is_index_table() && !index_schema->is_global_index_table() && data_table_schema->is_heap_table() && + } else if (need_all_part && OB_FAIL(get_all_part_columns_for_ddl(table_info, data_table_schema, index_dml_info->column_exprs_))) { LOG_WARN("failed to get all part columns for ddl" , K(ret)); } else { diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index 516d223b08..7ad1022872 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -31,6 +31,7 @@ #include "sql/optimizer/ob_opt_selectivity.h" #include "share/stat/ob_opt_stat_manager.h" #include "sql/rewrite/ob_predicate_deduce.h" +#include "share/vector_index/ob_vector_index_util.h" using namespace oceanbase; using namespace sql; using namespace oceanbase::common; @@ -1762,6 +1763,7 @@ int ObJoinOrder::create_one_access_path(const uint64_t table_id, ap->est_cost_info_.index_meta_info_.is_geo_index_ = index_info_entry->is_index_geo(); ap->est_cost_info_.index_meta_info_.is_multivalue_index_ = index_info_entry->is_multivalue_index(); ap->est_cost_info_.index_meta_info_.is_fulltext_index_ = index_info_entry->is_fulltext_index(); + ap->est_cost_info_.index_meta_info_.is_vector_index_ = index_info_entry->is_vector_index(); ap->est_cost_info_.is_virtual_table_ = is_virtual_table(ref_id); ap->est_cost_info_.table_metas_ = &get_plan()->get_basic_table_metas(); ap->est_cost_info_.sel_ctx_ = &get_plan()->get_selectivity_ctx(); @@ -2642,6 +2644,7 @@ int ObJoinOrder::fill_index_info_entry(const uint64_t table_id, entry->set_is_unique_index(is_unique_index); entry->set_is_fulltext_index(index_schema->is_fts_index()); entry->set_is_multivalue_index(index_schema->is_multivalue_index_aux()); + entry->set_is_vector_index(index_schema->is_vec_index()); entry->get_ordering_info().set_scan_direction(direction); } if (OB_SUCC(ret)) { @@ -3019,6 +3022,11 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id, int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1; const LogTableHint *log_table_hint = NULL; ObMatchFunRawExpr *match_expr = NULL; + ObRawExpr *vector_expr = NULL; + const ObSelectStmt *select_stmt = NULL; + bool has_aggr = false; // defend aggr for ann search + bool is_vec_index_hint = false; + bool vector_index_match = false; if (OB_ISNULL(get_plan()) || OB_ISNULL(stmt = get_plan()->get_stmt()) || OB_ISNULL(schema_guard = OPT_CTX.get_sql_schema_guard()) || @@ -3042,6 +3050,14 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id, } else if (OB_FAIL(valid_index_ids.push_back(inv_idx_tid))) { LOG_WARN("failed to assign index ids", K(ret)); } + } else if (stmt->is_select_stmt() && FALSE_IT(select_stmt = static_cast(stmt))) { + } else if (nullptr != select_stmt && FALSE_IT(has_aggr = select_stmt->get_aggr_item_size() > 0)) { + } else if (stmt->has_vec_approx() + && OB_NOT_NULL(vector_expr = stmt->get_first_vector_expr()) + && OB_FAIL(get_vector_inv_index_tid(schema_guard, vector_expr, table_id, ref_table_id, has_aggr, vector_index_match, valid_index_ids))) { + LOG_WARN("failed to get matched vector index table id", K(ret)); + } else if (vector_index_match) { + // do nothing } else if (table_item->is_index_table_) { if (OB_FAIL(valid_index_ids.push_back(table_item->ref_id_))) { LOG_WARN("failed to push back array", K(ret)); @@ -3058,7 +3074,8 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id, false, table_item->access_all_part(), false /*domain index*/, - false /*spatial index*/))) { + false /*spatial index*/, + false /*vector index*/))) { LOG_WARN("failed to get can read index", K(ref_table_id), K(ret)); } else if (index_count > OB_MAX_INDEX_PER_TABLE + 1) { ret = OB_ERR_UNEXPECTED; @@ -12516,9 +12533,7 @@ int ObJoinOrder::fill_filters(const ObIArray &all_filters, } // 对于空间索引,空间谓词一定要回表计算 if (OB_SUCC(ret) && - (est_cost_info.index_meta_info_.is_geo_index_ || - est_cost_info.index_meta_info_.is_fulltext_index_ || - est_cost_info.index_meta_info_.is_multivalue_index_)) { + est_cost_info.index_meta_info_.is_domain_index()) { ret = est_cost_info.table_filters_.push_back(filter); } } else { @@ -16300,6 +16315,76 @@ int ObJoinOrder::param_values_table_expr(ObIArray &values_vector, return ret; } +int ObJoinOrder::get_vector_inv_index_tid(ObSqlSchemaGuard *schema_guard, + ObRawExpr *vector_expr, + const uint64_t table_id, + const uint64_t ref_table_id, + const bool has_aggr, + bool &vector_index_match, + ObIArray &valid_index_ids) +{ + int ret = OB_SUCCESS; + ObSQLSessionInfo *session_info = NULL; + const ObTableSchema *table_schema = NULL; + uint64_t inv_idx_tid = OB_INVALID_ID; + vector_index_match = false; + if (OB_ISNULL(vector_expr) || OB_ISNULL(schema_guard) || + OB_ISNULL(session_info = OPT_CTX.get_session_info()) || + OB_ISNULL(schema_guard->get_schema_guard())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(ref_table_id, table_schema))) { + LOG_WARN("failed to get main table schema", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + uint64_t vec_col_id = OB_INVALID_ID; + bool column_exist = false; + for (int i = 0; i < vector_expr->get_param_count() && OB_SUCC(ret) && !vector_index_match; ++i) { + const ObRawExpr *tmp_expr = vector_expr->get_param_expr(i); + const ObColumnSchemaV2 *tmp_index_col = nullptr; + if (OB_NOT_NULL(tmp_expr) && tmp_expr->is_column_ref_expr()) { + column_exist = true; + const ObColumnRefRawExpr *col_ref = ObRawExprUtils::get_column_ref_expr_recursively(tmp_expr); + if (col_ref->get_table_id() == table_id + && OB_NOT_NULL(tmp_index_col = table_schema->get_column_schema(col_ref->get_column_id()))) { + if (OB_FAIL(ObVectorIndexUtil::check_column_has_vector_index(*table_schema, *(schema_guard->get_schema_guard()), tmp_index_col->get_column_id(), vector_index_match))) { + LOG_WARN("failed to check column has vector index", K(ret), K(tmp_index_col->get_column_id()), K(vector_index_match)); + } else if (vector_index_match) { + vec_col_id = tmp_index_col->get_column_id(); + } else { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "should be vector column with vector index"); + } + } + } + } + + if (OB_FAIL(ret)) { + } else if (!column_exist) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "should be vector column with vector index"); + } else if (vector_index_match && has_aggr) { + vector_index_match = false; + } else if (!vector_index_match) { + } else if (OB_FAIL(ObVectorIndexUtil::get_vector_index_tid(schema_guard->get_schema_guard(), + *table_schema, + INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL, + vec_col_id, + inv_idx_tid))) { + LOG_WARN("fail to get spec vector delta buffer table id", K(ret), K(vec_col_id), KPC(table_schema)); + } else if (inv_idx_tid == OB_INVALID_ID) { + ret = OB_NOT_SUPPORTED; + LOG_INFO("can not find vector index for spec col id", K(ref_table_id), K(vec_col_id)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "should be vector column with vector index"); + } else if (OB_FAIL(valid_index_ids.push_back(inv_idx_tid))) { + LOG_WARN("failed to assign index ids", K(ret)); + } + } + return ret; +} + int ObJoinOrder::get_matched_inv_index_tid(ObMatchFunRawExpr *match_expr, uint64_t ref_table_id, uint64_t &inv_idx_tid) diff --git a/src/sql/optimizer/ob_join_order.h b/src/sql/optimizer/ob_join_order.h index a7adfa22c7..eec9044e5b 100644 --- a/src/sql/optimizer/ob_join_order.h +++ b/src/sql/optimizer/ob_join_order.h @@ -1347,6 +1347,14 @@ struct NullAwareAntiJoinInfo { uint64_t ref_table_id, uint64_t &inv_idx_tid); + int get_vector_inv_index_tid(ObSqlSchemaGuard *schema_guard, + ObRawExpr *vector_expr, + const uint64_t table_id, + const uint64_t ref_table_id, + const bool has_aggr, + bool &vector_index_match, + ObIArray &valid_index_ids); + inline ObTablePartitionInfo *get_table_partition_info() { return table_partition_info_; } int param_funct_table_expr(ObRawExpr* &function_table_expr, diff --git a/src/sql/optimizer/ob_log_del_upd.cpp b/src/sql/optimizer/ob_log_del_upd.cpp index 9cf6fd07bf..c59ef56c9a 100644 --- a/src/sql/optimizer/ob_log_del_upd.cpp +++ b/src/sql/optimizer/ob_log_del_upd.cpp @@ -1624,8 +1624,14 @@ int ObLogDelUpd::replace_dml_info_exprs( } else if (NULL != index_dml_info->new_rowid_expr_ && OB_FAIL(replace_expr_action(replacer, index_dml_info->new_rowid_expr_))) { LOG_WARN("failed to replace new rowid expr", K(ret)); - } else if (OB_FAIL(replace_exprs_action(replacer, index_dml_info->column_old_values_exprs_))) { - LOG_WARN("failed to replace column old values exprs ", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < index_dml_info->column_old_values_exprs_.count(); ++i) { + ObRawExpr *&expr = index_dml_info->column_old_values_exprs_.at(i); + if (expr->is_column_ref_expr() && static_cast(expr)->is_vec_vid_column()) { + // just skip, nothing to do. + } else if (OB_FAIL(replace_expr_action(replacer, index_dml_info->column_old_values_exprs_.at(i)))) { + LOG_WARN("fail to replace expr", K(ret), K(i), K(index_dml_info->column_old_values_exprs_)); + } } for (int64_t i = 0; OB_SUCC(ret) && i < index_dml_info->assignments_.count(); ++i) { if (OB_FAIL(replace_expr_action(replacer, index_dml_info->assignments_.at(i).expr_))) { diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index 19e49531f7..4e81938fbc 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -78,6 +78,7 @@ #include "sql/spm/ob_spm_define.h" #endif #include "sql/optimizer/ob_log_values_table_access.h" +#include "share/vector_index/ob_vector_index_util.h" using namespace oceanbase; using namespace sql; @@ -2895,6 +2896,9 @@ int ObLogPlan::allocate_access_path(AccessPath *ap, } else if (ap->est_cost_info_.index_meta_info_.is_multivalue_index_ && OB_FAIL(prepare_multivalue_retrieval_scan(scan))) { LOG_WARN("failed to prepare multivalue doc_rowkey ", K(ret)); + } else if (ap->est_cost_info_.index_meta_info_.is_vector_index_ && get_stmt()->has_vec_approx() && + OB_FAIL(prepare_vector_index_info(scan))) { + LOG_WARN("failed to prepare multivalue doc_rowkey ", K(ret)); } } @@ -7003,14 +7007,14 @@ int ObLogPlan::allocate_sort_and_exchange_as_top(ObLogicalOperator *&top, if (OB_FAIL(ret)) { // do nothing } else if (OB_SUCC(ret) && NULL != topn_expr && need_sort && - OB_FAIL(try_push_topn_into_text_retrieval_scan(top, - topn_expr, - get_stmt()->get_limit_expr(), - get_stmt()->get_offset_expr(), - is_fetch_with_ties, - exch_info.need_exchange(), - sort_keys, - need_further_sort))) { + OB_FAIL(try_push_topn_into_domain_scan(top, + topn_expr, + get_stmt()->get_limit_expr(), + get_stmt()->get_offset_expr(), + is_fetch_with_ties, + exch_info.need_exchange(), + sort_keys, + need_further_sort))) { LOG_WARN("failed to push topn into text retrieval scan", K(ret)); } else if (!need_further_sort) { // do nothing @@ -7436,7 +7440,7 @@ int ObLogPlan::try_push_limit_into_table_scan(ObLogicalOperator *top, !(table_scan->get_is_index_global() && table_scan->get_index_back() && table_scan->has_index_lookup_filter()) && (NULL == table_scan->get_limit_expr() || ObOptimizerUtil::is_point_based_sub_expr(limit_expr, table_scan->get_limit_expr())) && - table_scan->get_text_retrieval_info().topk_limit_expr_ == NULL) { + (table_scan->get_text_retrieval_info().topk_limit_expr_ == NULL || table_scan->get_vector_index_info().topk_limit_expr_ == NULL)) { bool das_multi_partition = false; if (table_scan->use_das() && NULL != table_scan->get_table_partition_info()) { int64_t partition_count = table_scan->get_table_partition_info()-> @@ -7467,6 +7471,8 @@ int ObLogPlan::try_push_limit_into_table_scan(ObLogicalOperator *top, } } else if (OB_NOT_NULL(table_scan->get_text_retrieval_info().topk_limit_expr_)) { is_pushed = true; + } else if (OB_NOT_NULL(table_scan->get_vector_index_info().topk_limit_expr_)) { + is_pushed = true; } } else { /*do nothing*/ } return ret; @@ -10691,6 +10697,8 @@ int ObLogPlan::do_post_plan_processing() LOG_WARN("failed to set duplicated table location", K(ret)); } else if (OB_FAIL(set_advisor_table_id(root))) { LOG_WARN("failed to set advise table id from duplicate table", K(ret)); + } else if (OB_FAIL(check_das_need_scan_with_vid(root))) { + LOG_WARN("failed to check das need scan with vid", K(ret)); } else if (OB_FAIL(collect_table_location(root))) { LOG_WARN("failed to collect table location", K(ret)); } else if (OB_FAIL(build_location_related_tablet_ids())) { @@ -11252,6 +11260,24 @@ int ObLogPlan::collect_location_related_info(ObLogicalOperator &op) } } + if (OB_SUCC(ret) && tsc_op.is_vec_idx_scan()) { + if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, tsc_op.get_vector_index_info().delta_buffer_tid_))) { + LOG_WARN("failed to append index id table id", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, tsc_op.get_vector_index_info().index_id_tid_))) { + LOG_WARN("failed to append index id table id", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, tsc_op.get_vector_index_info().index_snapshot_data_tid_))) { + LOG_WARN("failed to append index_snapshot_data_tid", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, tsc_op.get_real_ref_table_id()))) { + LOG_WARN("failed to append main table id", K(ret)); + } + } + + if (OB_SUCC(ret) && tsc_op.is_tsc_with_vid()) { + if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, tsc_op.get_rowkey_vid_table_id()))) { + LOG_WARN("fail to store rowkey doc table id", K(ret)); + } + } + if (OB_SUCC(ret) && OB_FAIL(optimizer_context_.get_loc_rel_infos().push_back(rel_info))) { LOG_WARN("store location related info failed", K(ret)); } @@ -11408,6 +11434,26 @@ int ObLogPlan::check_das_need_keep_ordering(ObLogicalOperator *op) return ret; } +int ObLogPlan::check_das_need_scan_with_vid(ObLogicalOperator *op) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(op)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null param", K(ret)); + } else if (log_op_def::LOG_TABLE_SCAN == op->get_type()) { + ObLogTableScan *scan = static_cast(op); + if (OB_FAIL(scan->check_das_need_scan_with_vid())) { + LOG_WARN("failed to check das scan with doc id", K(ret)); + } + } + for (int i = 0; OB_SUCC(ret) && i < op->get_num_of_child(); ++i) { + if (OB_FAIL(SMART_CALL(check_das_need_scan_with_vid(op->get_child(i))))) { + LOG_WARN("failed to check das need scan with doc id", K(ret)); + } + } + return ret; +} + int ObLogPlan::calc_plan_resource() { int ret = OB_SUCCESS; @@ -13980,6 +14026,98 @@ int ObLogPlan::prepare_text_retrieval_scan(const ObIArray &exprs, O return ret; } +int ObLogPlan::prepare_vector_index_info(ObLogicalOperator *scan) +{ + int ret = OB_SUCCESS; + ObLogTableScan *table_scan = static_cast(scan); + ObSchemaGetterGuard *schema_guard = nullptr; + ObSQLSessionInfo *session = nullptr; + const ObTableSchema *table_schema = nullptr; + const ObDMLStmt *stmt = get_stmt(); + if (OB_ISNULL(stmt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect null stmt", K(ret)); + } else if (OB_ISNULL(schema_guard = get_optimizer_context().get_schema_guard()) + || OB_ISNULL(session = get_optimizer_context().get_session_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointers", K(ret), KP(get_stmt()), KP(schema_guard), KP(session)); + } else if (OB_FAIL(schema_guard->get_table_schema( + session->get_effective_tenant_id(), table_scan->get_real_ref_table_id(), table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null table schema", K(ret)); + } else { + bool is_correct_table = false; + uint64_t vec_col_id = OB_INVALID_ID; + ObRawExpr *vector_expr = stmt->get_first_vector_expr(); + if (OB_ISNULL(vector_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else { + bool col_has_vec_idx = false; + for (int i = 0; i < vector_expr->get_param_count() && OB_SUCC(ret) && vec_col_id == OB_INVALID_ID; ++i) { + const ObRawExpr *tmp_expr = vector_expr->get_param_expr(i); + const ObColumnSchemaV2 *tmp_index_col = nullptr; + if (OB_NOT_NULL(tmp_expr) && tmp_expr->has_flag(CNT_COLUMN)) { + const ObColumnRefRawExpr *col_ref = ObRawExprUtils::get_column_ref_expr_recursively(tmp_expr); + if (col_ref->get_table_id() == table_scan->get_table_id()) { + is_correct_table = true; + if (OB_NOT_NULL(tmp_index_col = table_schema->get_column_schema(col_ref->get_column_id()))) { + if (OB_FAIL(ObVectorIndexUtil::check_column_has_vector_index(*table_schema, *schema_guard, tmp_index_col->get_column_id(), col_has_vec_idx))) { + LOG_WARN("failed to check column has vector index", K(ret), K(tmp_index_col->get_column_id()), K(col_has_vec_idx)); + } else if (col_has_vec_idx) { + vec_col_id = tmp_index_col->get_column_id(); + } + } + } + } + } + if (OB_SUCC(ret) && vec_col_id == OB_INVALID_ID && is_correct_table) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to find spec vec col id", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (!is_correct_table) { + // do nothing + } else { + // 通过主表schema获取相关需要的所有index表的信息 + ObVectorIndexInfo &vc_info = table_scan->get_vector_index_info(); + uint64_t vec_id_rowkey_tid = OB_INVALID_ID; + if (OB_FAIL(ObVectorIndexUtil::get_vector_index_tid(schema_guard, + *table_schema, + INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL, + vec_col_id, + vc_info.delta_buffer_tid_))) { + LOG_WARN("fail to get spec vector delta buffer table id", K(ret), K(vec_col_id), KPC(table_schema)); + } else if (OB_FAIL(ObVectorIndexUtil::get_vector_index_tid(schema_guard, + *table_schema, + INDEX_TYPE_VEC_INDEX_ID_LOCAL, + vec_col_id, + vc_info.index_id_tid_))) { + LOG_WARN("fail to get spec vector index id table id", K(ret), K(vec_col_id), KPC(table_schema)); + } else if (OB_FAIL(ObVectorIndexUtil::get_vector_index_tid(schema_guard, + *table_schema, + INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL, + vec_col_id, + vc_info.index_snapshot_data_tid_))) { + LOG_WARN("fail to get spec vector index snapshot data table id", K(ret), K(vec_col_id), KPC(table_schema)); + } else if (OB_FAIL(table_schema->get_vec_id_rowkey_tid(vec_id_rowkey_tid))) { + LOG_WARN("failed to get doc_id_rowkey table id", K(ret)); + } else { + vc_info.main_table_tid_ = table_scan->get_real_ref_table_id(); + vc_info.sort_key_.expr_ = vector_expr; + vc_info.topk_limit_expr_ = stmt->get_limit_expr(); + vc_info.topk_offset_expr_ = stmt->get_offset_expr(); + table_scan->set_doc_id_index_table_id(vec_id_rowkey_tid); + table_scan->set_index_back(true); + } + } + } + return ret; +} + int ObLogPlan::prepare_multivalue_retrieval_scan(ObLogicalOperator *scan) { int ret = OB_SUCCESS; @@ -14008,6 +14146,94 @@ int ObLogPlan::prepare_multivalue_retrieval_scan(ObLogicalOperator *scan) return ret; } +int ObLogPlan::try_push_topn_into_domain_scan(ObLogicalOperator *&top, + ObRawExpr *topn_expr, + ObRawExpr *limit_expr, + ObRawExpr *offset_expr, + bool is_fetch_with_ties, + bool need_exchange, + const ObIArray &sort_keys, + bool &need_further_sort) +{ + int ret = OB_SUCCESS; + ObLogTableScan *table_scan = NULL; + if (OB_ISNULL(top)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(top), K(limit_expr), K(get_stmt()), K(ret)); + } else if (log_op_def::LOG_TABLE_SCAN != top->get_type()) { + // do nothing + } else if (OB_FALSE_IT(table_scan = static_cast(top))) { + } else if (table_scan->is_text_retrieval_scan()) { + if (OB_FAIL(try_push_topn_into_text_retrieval_scan(top, + topn_expr, + get_stmt()->get_limit_expr(), + get_stmt()->get_offset_expr(), + is_fetch_with_ties, + need_exchange, + sort_keys, + need_further_sort))) { + LOG_WARN("failed to push topn into text retrieval scan", K(ret)); + } + } else if (table_scan->is_vec_idx_scan()) { + if (OB_FAIL(try_push_topn_into_vector_index_scan(top, + topn_expr, + get_stmt()->get_limit_expr(), + get_stmt()->get_offset_expr(), + is_fetch_with_ties, + need_exchange, + sort_keys, + need_further_sort))) { + LOG_WARN("failed to push topn into vector index scan", K(ret)); + } + } // if not full tex or vector index, do noting + return ret; +} + +int ObLogPlan::try_push_topn_into_vector_index_scan(ObLogicalOperator *&top, + ObRawExpr *topn_expr, + ObRawExpr *limit_expr, + ObRawExpr *offset_expr, + bool is_fetch_with_ties, + bool need_exchange, + const ObIArray &sort_keys, + bool &need_further_sort) +{ + int ret = OB_SUCCESS; + need_further_sort = true; + ObLogTableScan *table_scan = NULL; + bool has_multi_sort_keys = false; + ObRawExpr *pushed_limit_expr = NULL; + ObRawExpr *pushed_offset_expr = NULL; + if (OB_ISNULL(top) || OB_ISNULL(get_stmt()) || sort_keys.count() == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(top), K(limit_expr), K(get_stmt()), K(ret)); + } else if (log_op_def::LOG_TABLE_SCAN != top->get_type()) { + // do nothing + } else if (OB_FALSE_IT(table_scan = static_cast(top))) { + } else if (table_scan->get_filter_exprs().count() != 0 || + table_scan->get_pushdown_filter_exprs().count() != 0) { + // do nothing, topn pushdown requires that only match filter exists on the base table. + } else { + // get some topk, limit, sort expr and set to vector index op + has_multi_sort_keys = sort_keys.count() == 1 ? false : true; + need_further_sort = (has_multi_sort_keys || table_scan->use_das() || need_exchange) && OB_NOT_NULL(topn_expr); + pushed_limit_expr = need_further_sort ? topn_expr : limit_expr; + pushed_offset_expr = need_further_sort ? NULL : offset_expr; + ObSEArray tmp_sort_keys; + table_scan->get_vector_index_info().sort_key_.order_type_ = sort_keys.at(0).order_type_; + if (OB_FAIL(tmp_sort_keys.push_back(sort_keys.at(0)))) { + LOG_WARN("failed to push back order item", K(ret)); + } else if (OB_FAIL(table_scan->set_op_ordering(tmp_sort_keys))) { + LOG_WARN("failed to set op ordering", K(ret)); + } else { + // check if single partion or non-partition, maybe need more check + // 通过need_further_sort控制是否还需要在外面添加topn算子 + need_further_sort = table_scan->get_table_partition_info()->get_table_location().is_partitioned(); + } + } + return ret; +} + int ObLogPlan::try_push_topn_into_text_retrieval_scan(ObLogicalOperator *&top, ObRawExpr *topn_expr, ObRawExpr *limit_expr, diff --git a/src/sql/optimizer/ob_log_plan.h b/src/sql/optimizer/ob_log_plan.h index bcb7f820dd..5a02fd4af6 100644 --- a/src/sql/optimizer/ob_log_plan.h +++ b/src/sql/optimizer/ob_log_plan.h @@ -264,6 +264,7 @@ public: int collect_location_related_info(ObLogicalOperator &op); int build_location_related_tablet_ids(); int check_das_need_keep_ordering(ObLogicalOperator *op); + int check_das_need_scan_with_vid(ObLogicalOperator *op); int gen_das_table_location_info(ObLogTableScan *table_scan, ObTablePartitionInfo *&table_partition_info); @@ -1423,8 +1424,25 @@ public: int construct_startup_filter_for_limit(ObRawExpr *limit_expr, ObLogicalOperator *log_op); + int prepare_vector_index_info(ObLogicalOperator *scan); int prepare_text_retrieval_scan(const ObIArray &exprs, ObLogicalOperator *scan); int prepare_multivalue_retrieval_scan(ObLogicalOperator *scan); + int try_push_topn_into_domain_scan(ObLogicalOperator *&top, + ObRawExpr *topn_expr, + ObRawExpr *limit_expr, + ObRawExpr *offset_expr, + bool is_fetch_with_ties, + bool need_exchange, + const ObIArray &sort_keys, + bool &need_further_sort); + int try_push_topn_into_vector_index_scan(ObLogicalOperator *&top, + ObRawExpr *topn_expr, + ObRawExpr *limit_expr, + ObRawExpr *offset_expr, + bool is_fetch_with_ties, + bool need_exchange, + const ObIArray &sort_keys, + bool &need_further_sort); int try_push_topn_into_text_retrieval_scan(ObLogicalOperator *&top, ObRawExpr *topn_expr, ObRawExpr *limit_expr, diff --git a/src/sql/optimizer/ob_log_table_scan.cpp b/src/sql/optimizer/ob_log_table_scan.cpp index 57cf5dacad..2219be2ec7 100644 --- a/src/sql/optimizer/ob_log_table_scan.cpp +++ b/src/sql/optimizer/ob_log_table_scan.cpp @@ -26,6 +26,7 @@ #include "sql/optimizer/ob_join_order.h" #include "sql/optimizer/ob_log_join.h" #include "sql/dblink/ob_dblink_utils.h" +#include "share/vector_index/ob_vector_index_util.h" using namespace oceanbase::sql; using namespace oceanbase::common; @@ -199,6 +200,10 @@ int ObLogTableScan::get_op_exprs(ObIArray &all_exprs) LOG_WARN("failed to push back expr", K(ret)); } else if (is_text_retrieval_scan() && OB_FAIL(get_text_retrieval_calc_exprs(all_exprs))) { LOG_WARN("failed to get text retrieval exprs", K(ret)); + } else if (is_vec_idx_scan() && OB_FAIL(get_vec_idx_calc_exprs(all_exprs))) { + LOG_WARN("failed to get text retrieval exprs", K(ret)); + } else if (OB_FAIL(append(all_exprs, rowkey_vid_exprs_))) { + LOG_WARN("failed to append rowkey doc exprs", K(ret)); } else if (OB_FAIL(append(all_exprs, access_exprs_))) { LOG_WARN("failed to append exprs", K(ret)); } else if (OB_FAIL(append(all_exprs, pushdown_aggr_exprs_))) { @@ -234,6 +239,15 @@ int ObLogTableScan::allocate_expr_post(ObAllocExprContext &ctx) LOG_WARN("failed to mark expr as produced", K(*expr), K(branch_id_), K(id_), K(ret)); } else { /*do nothing*/ } } + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_vid_exprs_.count(); ++i) { + ObRawExpr *expr = rowkey_vid_exprs_.at(i); + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null expr", K(ret)); + } else if (OB_FAIL(mark_expr_produced(expr, branch_id_, id_, ctx))) { + LOG_WARN("failed to mark expr as produced", K(*expr), K(branch_id_), K(id_), K(ret)); + } + } if (OB_SUCC(ret) && is_text_retrieval_scan()) { // match against relevance expr will be calculated in storage ObSEArray tmp_exprs; @@ -450,6 +464,10 @@ int ObLogTableScan::generate_access_exprs() LOG_WARN("failed to copy filter before index back", K(ret)); } else if (is_text_retrieval_scan() && OB_FAIL(prepare_text_retrieval_dep_exprs())) { LOG_WARN("failed to copy text retrieval aggr exprs", K(ret)); + } else if (is_vec_idx_scan() && OB_FAIL(prepare_vector_access_exprs())) { + LOG_WARN("failed to copy vec idx scan exprs", K(ret)); + } else if (is_tsc_with_vid() && OB_FAIL(prepare_rowkey_vid_dep_exprs())) { + LOG_WARN("failed to prepare table scan with vec vid info", K(ret)); } else if (OB_FAIL(generate_necessary_rowkey_and_partkey_exprs())) { LOG_WARN("failed to generate rowkey and part exprs", K(ret)); } else if (OB_FAIL(allocate_group_id_expr())) { @@ -956,11 +974,13 @@ int ObLogTableScan::generate_necessary_rowkey_and_partkey_exprs() LOG_WARN("failed to check whether stmt has lob column", K(ret)); } else if (OB_FAIL(get_mbr_column_exprs(table_id_, spatial_exprs_))) { LOG_WARN("failed to check whether stmt has mbr column", K(ret)); - } else if (need_doc_id_index_back() && OB_FAIL(extract_doc_id_index_back_expr(domain_exprs_))) { + } else if (need_doc_id_index_back() && OB_FAIL(extract_doc_id_index_back_expr(domain_exprs_, is_vec_idx_scan()))) { LOG_WARN("failed to extract doc id index back exprs", K(ret)); } else if (is_text_retrieval_scan() && OB_FAIL(extract_text_retrieval_access_expr(domain_exprs_))) { LOG_WARN("failed to extract text retrieval access exprs", K(ret)); - } else if (is_heap_table && is_index_global_ && index_back_ && + } else if (is_vec_idx_scan() && OB_FAIL(extract_vec_idx_access_expr(domain_exprs_))) { + LOG_WARN("failed to extract vector index access exprs", K(ret)); + }else if (is_heap_table && is_index_global_ && index_back_ && OB_FAIL(get_part_column_exprs(table_id_, ref_table_id_, part_exprs_))) { LOG_WARN("failed to get part column exprs", K(ret)); } else if ((has_lob_column || index_back_) && @@ -1401,6 +1421,14 @@ int ObLogTableScan::get_plan_item_info(PlanText &plan_text, LOG_WARN("BUF_PRINTF fails", K(ret)); } else { /* Do nothing */ } + if (OB_SUCC(ret) && is_tsc_with_vid_) { + if (is_tsc_with_vid_ && OB_FAIL(BUF_PRINTF(", "))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (is_tsc_with_vid_ && OB_FAIL(BUF_PRINTF("with_vid=%s", is_tsc_with_vid_ ?"true" : "false"))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } + } + if (OB_SUCC(ret) && (0 != filter_before_index_back_.count())) { if (OB_FAIL(BUF_PRINTF(", "))) { LOG_WARN("BUF_PRINTF fails", K(ret)); @@ -2307,7 +2335,7 @@ ObRawExpr * ObLogTableScan::get_real_expr(const ObRawExpr *col) const return ret; } -int ObLogTableScan::extract_doc_id_index_back_expr(ObIArray &exprs) +int ObLogTableScan::extract_doc_id_index_back_expr(ObIArray &exprs, bool is_vec_scan) { int ret = OB_SUCCESS; uint64_t doc_id_rowkey_tid = OB_INVALID_ID; @@ -2330,7 +2358,7 @@ int ObLogTableScan::extract_doc_id_index_back_expr(ObIArray &exprs) if (OB_ISNULL(col_schema = table_schema->get_column_schema_by_idx(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to get column schema by index", K(ret)); - } else if (col_schema->is_doc_id_column()) { + } else if ((!is_vec_scan && col_schema->is_doc_id_column()) || (is_vec_scan && col_schema->is_vec_vid_column())) { doc_id_col_schema = col_schema; break; } @@ -2389,6 +2417,81 @@ int ObLogTableScan::extract_text_retrieval_access_expr(ObIArray &ex return ret; } +int ObLogTableScan::extract_vec_idx_access_expr(ObIArray &exprs) +{ + int ret = OB_SUCCESS; + ObVectorIndexInfo &vec_info = get_vector_index_info(); + ObSqlSchemaGuard *schema_guard = nullptr; + const ObTableSchema *table_schema = nullptr; + ObSEArray col_items; + if (OB_ISNULL(get_stmt()) || OB_ISNULL(get_plan()) || + OB_ISNULL(schema_guard = get_plan()->get_optimizer_context().get_sql_schema_guard())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret), KP(get_stmt()), KP(get_plan()), KP(schema_guard)); + } else if (OB_FAIL(schema_guard->get_table_schema(ref_table_id_, table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_FAIL(exprs.push_back(vec_info.vec_id_column_))) { + LOG_WARN("failed to append token column to access exprs", K(ret)); + } else if (OB_FAIL(get_stmt()->get_column_items(table_id_, col_items))) { + LOG_WARN("failed to get column items", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < col_items.count(); ++i) { + const ColumnItem &col_item = col_items.at(i); + bool is_rowkey = false; + if (OB_FAIL(table_schema->get_rowkey_info().is_rowkey_column(col_item.column_id_, is_rowkey))) { + LOG_WARN("failed to check if column item is rowkey", K(ret)); + } else if (is_rowkey) { + exprs.push_back(col_item.expr_); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(exprs.push_back(vec_info.vec_id_column_))) { + LOG_WARN("failed to append vid column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(vec_info.delta_vid_column_))) { + LOG_WARN("failed to append token delta_vid column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(vec_info.delta_type_column_))) { + LOG_WARN("failed to append delta_type column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(vec_info.delta_vector_column_))) { + LOG_WARN("failed to append delta_vector column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(vec_info.index_id_scn_column_))) { + LOG_WARN("failed to append token column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(vec_info.index_id_vid_column_))) { + LOG_WARN("failed to append index_id_vid column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(vec_info.index_id_type_column_))) { + LOG_WARN("failed to append index_id_type column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(vec_info.index_id_vector_column_))) { + LOG_WARN("failed to append index_id_vector column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(vec_info.snapshot_key_column_))) { + LOG_WARN("failed to append snapshot_key column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(vec_info.snapshot_data_column_))) { + LOG_WARN("failed to append snapshot_data column to access exprs", K(ret)); + } else if (OB_FAIL(exprs.push_back(vec_info.target_vec_column_))) { + LOG_WARN("failed to append target vec column to access exprs", K(ret)); + } + } + + return ret; +} + +int ObLogTableScan::get_vec_idx_calc_exprs(ObIArray &all_exprs) +{ + int ret = OB_SUCCESS; + ObVectorIndexInfo &vec_info = get_vector_index_info(); + if (OB_ISNULL(vec_info.sort_key_.expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null vector sort expr", K(ret)); + } else if (OB_FAIL(all_exprs.push_back(vec_info.sort_key_.expr_))) { + LOG_WARN("failed to append vector sort expr", K(ret)); + } else if (OB_NOT_NULL(vec_info.topk_limit_expr_) && + OB_FAIL(all_exprs.push_back(vec_info.topk_limit_expr_))) { + LOG_WARN("failed to append limit expr", K(ret)); + } else if (OB_NOT_NULL(vec_info.topk_offset_expr_) && + OB_FAIL(all_exprs.push_back(vec_info.topk_offset_expr_))) { + LOG_WARN("failed to append offset expr", K(ret)); + } + return ret; +} + int ObLogTableScan::get_text_retrieval_calc_exprs(ObIArray &all_exprs) { int ret = OB_SUCCESS; @@ -2474,6 +2577,222 @@ int ObLogTableScan::print_text_retrieval_annotation(char *buf, int64_t buf_len, return ret; } +int ObLogTableScan::prepare_vector_access_exprs() +{ + int ret = OB_SUCCESS; + const ObTableSchema *table_schema = nullptr; + const ObTableSchema *delta_buf_table = nullptr; + const ObTableSchema *index_id_table = nullptr; + const ObTableSchema *snapshot_table = nullptr; + ObVectorIndexInfo &vc_info = get_vector_index_info(); + ObSqlSchemaGuard *schema_guard = nullptr; + TableItem *table_item = nullptr; + ObRawExprFactory *expr_factory = nullptr; + ObSQLSessionInfo *session_info = nullptr; + ObColumnRefRawExpr *vec_vid_column = nullptr; + ObColumnRefRawExpr *target_vec_column = nullptr; + ObColumnRefRawExpr *delta_vid_column = nullptr; + ObColumnRefRawExpr *delta_type_column = nullptr; + ObColumnRefRawExpr *delta_vector_column = nullptr; + ObColumnRefRawExpr *index_id_vid_column = nullptr; + ObColumnRefRawExpr *index_id_scn_column = nullptr; + ObColumnRefRawExpr *index_id_type_column = nullptr; + ObColumnRefRawExpr *index_id_vector_column = nullptr; + ObColumnRefRawExpr *snapshot_key_column = nullptr; + ObColumnRefRawExpr *snapshot_data_column = nullptr; + ObSEArray col_ids; + if (OB_NOT_NULL(vc_info.vec_id_column_) + && OB_NOT_NULL(vc_info.target_vec_column_) + && OB_NOT_NULL(vc_info.delta_vid_column_) + && OB_NOT_NULL(vc_info.delta_type_column_) + && OB_NOT_NULL(vc_info.delta_vector_column_) + && OB_NOT_NULL(vc_info.index_id_vid_column_) + && OB_NOT_NULL(vc_info.index_id_scn_column_) + && OB_NOT_NULL(vc_info.index_id_type_column_) + && OB_NOT_NULL(vc_info.index_id_vector_column_) + && OB_NOT_NULL(vc_info.snapshot_key_column_) + && OB_NOT_NULL(vc_info.snapshot_data_column_)) { + // do nothing, exprs already generated + } else if (OB_ISNULL(get_stmt()) || OB_ISNULL(get_plan()) || + OB_ISNULL(expr_factory = &get_plan()->get_optimizer_context().get_expr_factory()) || + OB_ISNULL(session_info = get_plan()->get_optimizer_context().get_session_info()) || + OB_ISNULL(schema_guard = get_plan()->get_optimizer_context().get_sql_schema_guard())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(get_real_ref_table_id(), table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(vc_info.delta_buffer_tid_, delta_buf_table))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(vc_info.index_id_tid_, index_id_table))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(vc_info.index_snapshot_data_tid_, snapshot_table))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_ISNULL(table_item = get_stmt()->get_table_item_by_id(get_table_id()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer", K(ret)); + } else if (OB_ISNULL(table_schema) || OB_ISNULL(delta_buf_table) || + OB_ISNULL(index_id_table) || OB_ISNULL(snapshot_table)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointer", K(ret)); + } else { + const ObColumnSchemaV2 *vec_column_schema = nullptr; + if (OB_FAIL(ObVectorIndexUtil::get_vector_index_column_id(*table_schema, *delta_buf_table, col_ids))) { // todo 考虑下要不要改这个函数 + LOG_WARN("failed to get vector index column.", K(ret)); + } else if (col_ids.count() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid vector col counts.", K(ret), K(col_ids.count())); + } else if (OB_ISNULL(vec_column_schema = table_schema->get_column_schema(col_ids.at(0)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid vector col column.", K(ret), K(col_ids.at(0))); + } else if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *vec_column_schema, target_vec_column))) { + LOG_WARN("failed to build target vector column expr", K(ret)); + } else if (OB_NOT_NULL(target_vec_column)) { + target_vec_column->set_ref_id(get_table_id(), vec_column_schema->get_column_id()); + target_vec_column->set_column_attr(get_table_name(), vec_column_schema->get_column_name_str()); + target_vec_column->set_database_name(table_item->database_name_); + } + + for (int64_t i = 0; OB_SUCC(ret) && i < table_schema->get_column_count() && OB_ISNULL(vec_vid_column); ++i) { + const ObColumnSchemaV2 *col_schema = table_schema->get_column_schema_by_idx(i); + if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (col_schema->is_vec_vid_column()) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *col_schema, vec_vid_column))) { + LOG_WARN("failed to build vec vid column expr", K(ret)); + } + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < delta_buf_table->get_column_count(); ++i) { + const ObColumnSchemaV2 *data_col_schema = nullptr; + const ObColumnSchemaV2 *col_schema = delta_buf_table->get_column_schema_by_idx(i); + if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (OB_ISNULL(data_col_schema = table_schema->get_column_schema(col_schema->get_column_id()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (data_col_schema->is_vec_vid_column()) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *data_col_schema, delta_vid_column))) { + LOG_WARN("failed to build vec vid column expr", K(ret)); + } + } else if (data_col_schema->is_vec_type_column()) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *data_col_schema, delta_type_column))) { + LOG_WARN("failed to build vec type column expr", K(ret)); + } else if (OB_NOT_NULL(delta_type_column)) { + delta_type_column->set_ref_id(get_table_id(), data_col_schema->get_column_id()); + delta_type_column->set_column_attr(get_table_name(), data_col_schema->get_column_name_str()); + delta_type_column->set_database_name(table_item->database_name_); + } + } else if (data_col_schema->is_vec_vector_column()) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *data_col_schema, delta_vector_column))) { + LOG_WARN("failed to build vec type column expr", K(ret)); + } else if (OB_NOT_NULL(delta_vector_column)) { + delta_vector_column->set_ref_id(get_table_id(), data_col_schema->get_column_id()); + delta_vector_column->set_column_attr(get_table_name(), data_col_schema->get_column_name_str()); + delta_vector_column->set_database_name(table_item->database_name_); + } + } + } + + for (int64_t i = 0; OB_SUCC(ret) && i < index_id_table->get_column_count(); ++i) { + const ObColumnSchemaV2 *data_col_schema = nullptr; + const ObColumnSchemaV2 *col_schema = index_id_table->get_column_schema_by_idx(i); + if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (OB_ISNULL(data_col_schema = table_schema->get_column_schema(col_schema->get_column_id()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (data_col_schema->is_vec_vid_column()) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *data_col_schema, index_id_vid_column))) { + LOG_WARN("failed to build vec vid column expr", K(ret)); + } + } else if (data_col_schema->is_vec_type_column()) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *data_col_schema, index_id_type_column))) { + LOG_WARN("failed to build vec type column expr", K(ret)); + } else if (OB_NOT_NULL(index_id_type_column)) { + index_id_type_column->set_ref_id(get_table_id(), data_col_schema->get_column_id()); + index_id_type_column->set_column_attr(get_table_name(), data_col_schema->get_column_name_str()); + index_id_type_column->set_database_name(table_item->database_name_); + } + } else if (data_col_schema->is_vec_vector_column()) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *data_col_schema, index_id_vector_column))) { + LOG_WARN("failed to build vec type column expr", K(ret)); + } else if (OB_NOT_NULL(index_id_vector_column)) { + index_id_vector_column->set_ref_id(get_table_id(), data_col_schema->get_column_id()); + index_id_vector_column->set_column_attr(get_table_name(), data_col_schema->get_column_name_str()); + index_id_vector_column->set_database_name(table_item->database_name_); + } + } else if (data_col_schema->is_vec_scn_column()) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *data_col_schema, index_id_scn_column))) { + LOG_WARN("failed to build vec type column expr", K(ret)); + } else if (OB_NOT_NULL(index_id_scn_column)) { + index_id_scn_column->set_ref_id(get_table_id(), data_col_schema->get_column_id()); + index_id_scn_column->set_column_attr(get_table_name(), data_col_schema->get_column_name_str()); + index_id_scn_column->set_database_name(table_item->database_name_); + } + } + } + + for (int64_t i = 0; OB_SUCC(ret) && i < snapshot_table->get_column_count(); ++i) { + const ObColumnSchemaV2 *data_col_schema = nullptr; + const ObColumnSchemaV2 *col_schema = snapshot_table->get_column_schema_by_idx(i); + if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (OB_ISNULL(data_col_schema = table_schema->get_column_schema(col_schema->get_column_id()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (data_col_schema->is_vec_key_column()) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *data_col_schema, snapshot_key_column))) { + LOG_WARN("failed to build vec vid column expr", K(ret)); + } else if (OB_NOT_NULL(snapshot_key_column)) { + snapshot_key_column->set_ref_id(get_table_id(), data_col_schema->get_column_id()); + snapshot_key_column->set_column_attr(get_table_name(), data_col_schema->get_column_name_str()); + snapshot_key_column->set_database_name(table_item->database_name_); + } + } else if (data_col_schema->is_vec_data_column()) { + if (OB_FAIL(ObRawExprUtils::build_column_expr(*expr_factory, *data_col_schema, snapshot_data_column))) { + LOG_WARN("failed to build vec type column expr", K(ret)); + } else if (OB_NOT_NULL(snapshot_data_column)) { + snapshot_data_column->set_ref_id(get_table_id(), data_col_schema->get_column_id()); + snapshot_data_column->set_column_attr(get_table_name(), data_col_schema->get_column_name_str()); + snapshot_data_column->set_database_name(table_item->database_name_); + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(vec_vid_column) || OB_ISNULL(target_vec_column) + || OB_ISNULL(delta_vid_column) || OB_ISNULL(delta_type_column) + || OB_ISNULL(delta_vector_column) || OB_ISNULL(index_id_vid_column) || OB_ISNULL(index_id_type_column) + || OB_ISNULL(index_id_scn_column) || OB_ISNULL(index_id_vector_column) + || OB_ISNULL(snapshot_key_column) || OB_ISNULL(snapshot_data_column)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null vetor index generated column", K(ret), + KP(vec_vid_column), KP(delta_vid_column), + KP(delta_type_column), KP(delta_vector_column), + KP(index_id_vid_column), KP(index_id_type_column), + KP(index_id_scn_column), KP(index_id_vector_column), + KP(snapshot_key_column), KP(snapshot_data_column)); + } else { + vc_info.target_vec_column_ = target_vec_column; + vc_info.vec_id_column_ = vec_vid_column; + vc_info.delta_vid_column_ = delta_vid_column; + vc_info.delta_type_column_ = delta_type_column; + vc_info.delta_vector_column_ = delta_vector_column; + vc_info.index_id_scn_column_ = index_id_scn_column; + vc_info.index_id_vid_column_ = index_id_vid_column; + vc_info.index_id_type_column_ = index_id_type_column; + vc_info.index_id_vector_column_ = index_id_vector_column; + vc_info.snapshot_key_column_ = snapshot_key_column; + vc_info.snapshot_data_column_ = snapshot_data_column; + } + } + return ret; +} + int ObLogTableScan::prepare_text_retrieval_dep_exprs() { int ret = OB_SUCCESS; @@ -2850,3 +3169,125 @@ int ObLogTableScan::get_filter_assist_exprs(ObIArray &assist_exprs) } return ret; } + +int ObLogTableScan::check_das_need_scan_with_vid() +{ + int ret = OB_SUCCESS; + const ObLogPlan *plan = nullptr; + const ObDMLStmt *stmt = nullptr; + ObSqlSchemaGuard *schema_guard = nullptr; + const ObTableSchema *table_schema = nullptr; + is_tsc_with_vid_ = false; + if (OB_ISNULL(plan = get_plan()) || OB_ISNULL(stmt = get_stmt())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect error, plan or stmt is nullptr", K(ret), KP(plan), KP(stmt)); + } else if (!(stmt->is_delete_stmt() || stmt->is_update_stmt() || stmt->is_select_stmt())) { + // just skip, nothing to do + } else if (get_contains_fake_cte() || is_virtual_table(get_ref_table_id())) { + // just skip, nothing to do; + } else if (OB_ISNULL(schema_guard = plan->get_optimizer_context().get_sql_schema_guard())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, schema guard or get_plan() is nullptr", K(ret), KP(plan), KP(schema_guard)); + } else if (OB_FAIL(schema_guard->get_table_schema(table_id_, ref_table_id_, get_stmt(), table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, table schema is nullptr", K(ret), K(get_real_ref_table_id()), K(table_id_), K(ref_table_id_)); + } else if (table_schema->is_vec_index()) { + // just skip, nothing to do. + LOG_TRACE("skip vector index", K(ret), KPC(table_schema)); + } else if (plan->get_optimizer_context().is_online_ddl() && + plan->get_optimizer_context().get_root_stmt()->is_insert_stmt() && + plan->get_optimizer_context().get_root_stmt()->get_table_items().count() > 0) { + const TableItem *insert_table_item = plan->get_optimizer_context().get_root_stmt()->get_table_item(0); + if (OB_NOT_NULL(insert_table_item)) { + const uint64_t ddl_table_id = insert_table_item->ddl_table_id_; + const schema::ObTableSchema *ddl_table_schema = nullptr; + if (OB_FAIL(schema_guard->get_table_schema(ddl_table_id, ddl_table_schema))) { + LOG_WARN("fail to get ddl table schema", K(ret), K(ddl_table_id)); + } else if (OB_ISNULL(ddl_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get ddl table schema nullptr", K(ret), K(ddl_table_id)); + } else if (ddl_table_schema->is_vec_vid_rowkey_type() || + ddl_table_schema->is_vec_delta_buffer_type() || + ddl_table_schema->is_vec_index_id_type() || + ddl_table_schema->is_vec_index_snapshot_data_type()) { + is_tsc_with_vid_ = true; + } + } + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < stmt->get_column_size(); i++) { + const ColumnItem *col_item = stmt->get_column_item(i); + if (OB_ISNULL(col_item) || OB_ISNULL(col_item->expr_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(col_item), K(ret)); + } else if (col_item->table_id_ != table_id_ || !col_item->expr_->is_explicited_reference()) { + // do nothing + } else if (!col_item->expr_->is_vec_vid_column()) { + // do nothing. + } else { + is_tsc_with_vid_ = true; + break; // Only need to prepare rowkey doc once. + } + } + } + if (OB_SUCC(ret) && is_tsc_with_vid_) { + if (OB_FAIL(table_schema->get_rowkey_vid_tid(rowkey_vid_tid_))) { + LOG_WARN("fail to get rowkey vid table id", K(ret), KPC(table_schema)); + } + } + LOG_TRACE("check_table_scan_with_vid", K(ret), K(is_tsc_with_vid_), K(rowkey_vid_tid_), KPC(table_schema)); + return ret; +} + +int ObLogTableScan::prepare_rowkey_vid_dep_exprs() +{ + int ret = OB_SUCCESS; + ObSqlSchemaGuard *schema_guard = nullptr; + const ObTableSchema *table_schema = nullptr; + const ObTableSchema *rowkey_vid_schema = nullptr; + ObArray rowkey_cids; + if (OB_ISNULL(get_plan()) || OB_ISNULL(schema_guard = get_plan()->get_optimizer_context().get_sql_schema_guard())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, schema guard or get_plan() is nullptr", K(ret), KP(get_plan()), KP(schema_guard)); + } else if (OB_FAIL(schema_guard->get_table_schema(get_real_ref_table_id(), table_schema))) { + LOG_WARN("failed to get table schema", K(ret)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, table schema is nullptr", K(ret)); + } else if (OB_FAIL(schema_guard->get_table_schema(rowkey_vid_tid_, rowkey_vid_schema))) { + LOG_WARN("fail toprint_ranges get rowkey vid table schema", K(ret), K(rowkey_vid_tid_)); + } else if (OB_ISNULL(rowkey_vid_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, rowkey vid schema is nullptr", K(ret), KPC(rowkey_vid_schema)); + } else if (OB_FAIL(rowkey_vid_schema->get_rowkey_column_ids(rowkey_cids))) { + LOG_WARN("fail to get rowkey column ids in rowkey vid", K(ret), KPC(rowkey_vid_schema)); + } else { + const ObColumnSchemaV2 *col_schema = nullptr; + ObColumnRefRawExpr *column_expr = nullptr; + uint64_t vec_vid_col_id = OB_INVALID_ID; + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_cids.count(); ++i) { + if (OB_ISNULL(col_schema = rowkey_vid_schema->get_column_schema(rowkey_cids.at(i)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::build_column_expr(get_plan()->get_optimizer_context().get_expr_factory(), + *col_schema, column_expr))) { + LOG_WARN("failed to build rowkey doc id column expr", K(ret), K(i), KPC(col_schema)); + } else if (OB_FAIL(rowkey_vid_exprs_.push_back(column_expr))) { + LOG_WARN("fail to push back column expr", K(ret)); + } + } + if (FAILEDx(rowkey_vid_schema->get_vec_index_vid_col_id(vec_vid_col_id))) { + LOG_WARN("fail to get vec index column ids", K(ret), KPC(rowkey_vid_schema)); + } else if (OB_ISNULL(col_schema = rowkey_vid_schema->get_column_schema(vec_vid_col_id))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null column schema ptr", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::build_column_expr(get_plan()->get_optimizer_context().get_expr_factory(), + *col_schema, column_expr))) { + LOG_WARN("failed to build rowkey vec vid column expr", K(ret), K(vec_vid_col_id), KPC(col_schema)); + } else if (OB_FAIL(rowkey_vid_exprs_.push_back(column_expr))) { + LOG_WARN("fail to push back column expr", K(ret)); + } + } + return ret; +} diff --git a/src/sql/optimizer/ob_log_table_scan.h b/src/sql/optimizer/ob_log_table_scan.h index 77f8aba7be..48fe36eba6 100644 --- a/src/sql/optimizer/ob_log_table_scan.h +++ b/src/sql/optimizer/ob_log_table_scan.h @@ -95,6 +95,65 @@ struct ObRawFilterMonotonicity K_(mono), K_(assist_exprs)); }; +struct ObVectorIndexInfo +{ + ObVectorIndexInfo() + : sort_key_(), + topk_limit_expr_(nullptr), + topk_offset_expr_(nullptr), + target_vec_column_(nullptr), + vec_id_column_(nullptr), + delta_vid_column_(), + delta_type_column_(), + delta_vector_column_(), + index_id_scn_column_(), + index_id_vid_column_(), + index_id_type_column_(), + index_id_vector_column_(), + snapshot_key_column_(), + snapshot_data_column_(), + delta_buffer_tid_(OB_INVALID_ID), + index_id_tid_(OB_INVALID_ID), + index_snapshot_data_tid_(OB_INVALID_ID), + main_table_tid_(OB_INVALID_ID) + { } + ~ObVectorIndexInfo() {} + + TO_STRING_KV(K_(sort_key), KPC_(topk_limit_expr), KPC_(topk_offset_expr), + KPC_(vec_id_column), KPC_(delta_vid_column), KPC_(delta_type_column), + KPC_(delta_vector_column), KPC_(index_id_scn_column), KPC_(index_id_vid_column), + KPC_(index_id_type_column), KPC_(index_id_vector_column), + KPC_(snapshot_key_column), KPC_(snapshot_data_column), + K_(delta_buffer_tid), K_(index_id_tid), K_(index_snapshot_data_tid), + K_(main_table_tid)); + bool need_sort() const { return sort_key_.expr_ != nullptr; } + + // topn infos + OrderItem sort_key_; + ObRawExpr *topk_limit_expr_; + ObRawExpr *topk_offset_expr_; + // table col access expr + ObColumnRefRawExpr *target_vec_column_; + + ObColumnRefRawExpr *vec_id_column_; + + // column of delta_buffer_table + ObColumnRefRawExpr *delta_vid_column_; + ObColumnRefRawExpr *delta_type_column_; + ObColumnRefRawExpr *delta_vector_column_; + // column of index_id_table + ObColumnRefRawExpr *index_id_scn_column_; + ObColumnRefRawExpr *index_id_vid_column_; + ObColumnRefRawExpr *index_id_type_column_; + ObColumnRefRawExpr *index_id_vector_column_; + // column of index_snapshot_data_table + ObColumnRefRawExpr *snapshot_key_column_; + ObColumnRefRawExpr *snapshot_data_column_; + uint64_t delta_buffer_tid_; + uint64_t index_id_tid_; + uint64_t index_snapshot_data_tid_; + uint64_t main_table_tid_; +}; class ObLogTableScan : public ObLogicalOperator { @@ -155,8 +214,11 @@ public: use_column_store_(false), doc_id_table_id_(common::OB_INVALID_ID), text_retrieval_info_(), + vector_index_info_(), das_keep_ordering_(false), - filter_monotonicity_() + filter_monotonicity_(), + is_tsc_with_vid_(false), + rowkey_vid_tid_(common::OB_INVALID_ID) { } @@ -567,15 +629,19 @@ public: int adjust_print_access_info(ObIArray &access_exprs); static int replace_gen_column(ObLogPlan *plan, ObRawExpr *part_expr, ObRawExpr *&new_part_expr); int extract_file_column_exprs_recursively(ObRawExpr *expr); + inline bool is_tsc_with_vid() const { return is_tsc_with_vid_; } inline bool is_text_retrieval_scan() const { return is_index_scan() && NULL != text_retrieval_info_.match_expr_; } inline bool is_multivalue_index_scan() const { return is_multivalue_index_; } inline ObTextRetrievalInfo &get_text_retrieval_info() { return text_retrieval_info_; } inline const ObTextRetrievalInfo &get_text_retrieval_info() const { return text_retrieval_info_; } int prepare_text_retrieval_dep_exprs(); + int prepare_vector_access_exprs(); inline bool need_text_retrieval_calc_relevance() const { return text_retrieval_info_.need_calc_relevance_; } - inline bool need_doc_id_index_back() const { return is_text_retrieval_scan() || is_multivalue_index_scan() ; } + inline bool need_doc_id_index_back() const { return is_text_retrieval_scan() || is_multivalue_index_scan() || is_vec_idx_scan(); } inline void set_doc_id_index_table_id(const uint64_t doc_id_index_table_id) { doc_id_table_id_ = doc_id_index_table_id; } inline uint64_t get_doc_id_index_table_id() const { return doc_id_table_id_; } + inline uint64_t get_rowkey_vid_table_id() const { return rowkey_vid_tid_; } + inline const common::ObIArray &get_rowkey_vid_exprs() const { return rowkey_vid_exprs_; } virtual int get_card_without_filter(double &card) override; inline ObRawExpr *get_identify_seq_expr() { return identify_seq_expr_; } inline int has_exec_param(bool &bool_ret) const @@ -583,10 +649,14 @@ public: return est_cost_info_ == NULL ? common::OB_SUCCESS : est_cost_info_->has_exec_param(bool_ret); } void set_identify_seq_expr(ObRawExpr *expr) { identify_seq_expr_ = expr; } + inline bool is_vec_idx_scan() const { return is_index_scan() && vector_index_info_.delta_buffer_tid_ != OB_INVALID_ID; } + inline ObVectorIndexInfo &get_vector_index_info() { return vector_index_info_; } + inline const ObVectorIndexInfo &get_vector_index_info() const { return vector_index_info_; } inline bool das_need_keep_ordering() const { return das_keep_ordering_; } int check_das_need_keep_ordering(); + int check_das_need_scan_with_vid(); const ObIArray& get_filter_monotonicity() const { return filter_monotonicity_; } @@ -610,13 +680,16 @@ private: // member functions int get_mbr_column_exprs(const uint64_t table_id, ObIArray &mbr_exprs); int allocate_lookup_trans_info_expr(); int allocate_group_id_expr(); - int extract_doc_id_index_back_expr(ObIArray &exprs); + int extract_doc_id_index_back_expr(ObIArray &exprs, bool is_vec_scan = false); int extract_text_retrieval_access_expr(ObIArray &exprs); + int extract_vec_idx_access_expr(ObIArray &exprs); int get_text_retrieval_calc_exprs(ObIArray &all_exprs); + int get_vec_idx_calc_exprs(ObIArray &all_exprs); int print_text_retrieval_annotation(char *buf, int64_t buf_len, int64_t &pos, ExplainType type); int find_nearest_rcte_op(ObLogSet *&rcte_op); int generate_filter_monotonicity(); int get_filter_assist_exprs(ObIArray &assist_exprs); + int prepare_rowkey_vid_dep_exprs(); protected: // memeber variables // basic info uint64_t table_id_; //table id or alias table id @@ -728,13 +801,21 @@ protected: // memeber variables share::schema::ObTableType table_type_; bool use_column_store_; - uint64_t doc_id_table_id_; // used for rowkey lookup of fulltext and JSON multi-value index + uint64_t doc_id_table_id_; // used for rowkey lookup of fulltext, JSON multi-value and vector index ObTextRetrievalInfo text_retrieval_info_; + ObVectorIndexInfo vector_index_info_; ObPxRFStaticInfo px_rf_info_; bool das_keep_ordering_; typedef common::ObSEArray FilterMonotonicity; FilterMonotonicity filter_monotonicity_; + + // begin for table scan with vid + bool is_tsc_with_vid_; + uint64_t rowkey_vid_tid_; + common::ObSEArray rowkey_vid_exprs_; + // end for table scan with vid + // disallow copy and assign DISALLOW_COPY_AND_ASSIGN(ObLogTableScan); }; diff --git a/src/sql/optimizer/ob_logical_operator.cpp b/src/sql/optimizer/ob_logical_operator.cpp index a3950b6479..609c257691 100644 --- a/src/sql/optimizer/ob_logical_operator.cpp +++ b/src/sql/optimizer/ob_logical_operator.cpp @@ -4343,7 +4343,7 @@ int ObLogicalOperator::allocate_granule_nodes_above(AllocGIContext &ctx) gi_op->add_flag(GI_PARTITION_WISE); } if (LOG_TABLE_SCAN == get_type()) { - if (static_cast(this)->is_text_retrieval_scan()) { + if (static_cast(this)->is_text_retrieval_scan() || static_cast(this)->is_vec_idx_scan()) { gi_op->add_flag(GI_FORCE_PARTITION_GRANULE); } if (static_cast(this)->get_join_filter_info().is_inited_) { diff --git a/src/sql/optimizer/ob_opt_est_cost_model.h b/src/sql/optimizer/ob_opt_est_cost_model.h index 182a1a2be8..457544c382 100644 --- a/src/sql/optimizer/ob_opt_est_cost_model.h +++ b/src/sql/optimizer/ob_opt_est_cost_model.h @@ -110,12 +110,14 @@ struct ObIndexMetaInfo is_geo_index_(false), is_fulltext_index_(false), is_multivalue_index_(false), + is_vector_index_(false), index_micro_block_count_(-1) { } virtual ~ObIndexMetaInfo() { } void assign(const ObIndexMetaInfo &index_meta_info); double get_micro_block_numbers() const; + inline bool is_domain_index() const { return is_geo_index_ || is_fulltext_index_ || is_multivalue_index_ || is_vector_index_;} TO_STRING_KV(K_(ref_table_id), K_(index_id), K_(index_micro_block_size), K_(index_part_count), K_(index_part_size), K_(index_column_count), K_(is_index_back), @@ -132,6 +134,7 @@ struct ObIndexMetaInfo bool is_geo_index_; // whether is spatial index bool is_fulltext_index_; // is fulltext index bool is_multivalue_index_; // is multivalue index + bool is_vector_index_; // is vector index int64_t index_micro_block_count_; // micro block count from table static info private: DISALLOW_COPY_AND_ASSIGN(ObIndexMetaInfo); diff --git a/src/sql/parser/non_reserved_keywords_mysql_mode.c b/src/sql/parser/non_reserved_keywords_mysql_mode.c index 3bc066704b..dc3d225f23 100644 --- a/src/sql/parser/non_reserved_keywords_mysql_mode.c +++ b/src/sql/parser/non_reserved_keywords_mysql_mode.c @@ -47,6 +47,8 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"analyze", ANALYZE}, {"and", AND}, {"any", ANY}, + {"approx", APPROX}, + {"approximate", APPROXIMATE}, {"approx_count_distinct", APPROX_COUNT_DISTINCT}, {"approx_count_distinct_synopsis", APPROX_COUNT_DISTINCT_SYNOPSIS}, {"approx_count_distinct_synopsis_merge", APPROX_COUNT_DISTINCT_SYNOPSIS_MERGE}, @@ -70,6 +72,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"availability", AVAILABILITY}, {"avg", AVG}, {"avg_row_length", AVG_ROW_LENGTH}, + {"array", ARRAY}, {"backup", BACKUP}, {"backupset", BACKUPSET}, {"balance", BALANCE}, @@ -167,6 +170,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"connect", CONNECT}, {"convert", CONVERT}, {"copy", COPY}, + {"cosine", COSINE}, {"count", COUNT}, {"cpu", CPU}, {"create", CREATE}, @@ -272,6 +276,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"escape", ESCAPE}, {"escaped", ESCAPED}, {"estimate", ESTIMATE}, + {"euclidean", EUCLIDEAN}, {"event", EVENT}, {"events", EVENTS}, {"every", EVERY}, @@ -473,6 +478,7 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"low_priority", LOW_PRIORITY}, {"ls", LS}, {"major", MAJOR}, + {"manhattan", MANHATTAN}, {"manual", MANUAL}, {"master", MASTER}, {"master_bind", MASTER_BIND}, @@ -1003,6 +1009,8 @@ static const NonReservedKeyword Mysql_none_reserved_keywords[] = {"validate", VALIDATE}, {"values", VALUES}, {"varying", VARYING}, + {"vector", VECTOR}, + {"vector_distance", VECTOR_DISTANCE}, {"view", VIEW}, {"virtual", VIRTUAL}, {"virtual_column_id", VIRTUAL_COLUMN_ID}, diff --git a/src/sql/parser/ob_char_type.h b/src/sql/parser/ob_char_type.h index 1dbdccc749..7e46768637 100644 --- a/src/sql/parser/ob_char_type.h +++ b/src/sql/parser/ob_char_type.h @@ -215,14 +215,14 @@ namespace sql 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; - // [+&~|^/%*(),.!=<>{}] + // [+&~|^/%*(),.!=<>{}\[\]] static const bool MYSQL_NORMAL_CHAR_FLAGS[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,1,0,0,0,1,1,0,1,1,1,1,1,0,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, diff --git a/src/sql/parser/parse_node.h b/src/sql/parser/parse_node.h index 3c3200cc6e..6413799512 100644 --- a/src/sql/parser/parse_node.h +++ b/src/sql/parser/parse_node.h @@ -53,6 +53,7 @@ enum SelectParserOffset PARSE_SELECT_FORMER, PARSE_SELECT_LATER, PARSE_SELECT_ORDER, + PARSE_SELECT_APPROX, PARSE_SELECT_LIMIT, PARSE_SELECT_FOR_UPD, PARSE_SELECT_HINTS, @@ -433,6 +434,7 @@ extern bool nodename_is_sdo_geometry_type(const ParseNode *node); #define OB_NODE_CAST_NUMBER_TYPE_IDX 1 #define OB_NODE_CAST_C_LEN_IDX 1 #define OB_NODE_CAST_GEO_TYPE_IDX 1 +#define OB_NODE_CAST_CS_LEVEL_IDX 2 typedef enum ObNumberParseType { diff --git a/src/sql/parser/sql_parser_base.h b/src/sql/parser/sql_parser_base.h index 6cdfd9212a..011ed5b6d0 100644 --- a/src/sql/parser/sql_parser_base.h +++ b/src/sql/parser/sql_parser_base.h @@ -1168,7 +1168,7 @@ do {\ } \ } while(0); \ -#define malloc_select_values_stmt(node, result, values_node, order_by_node, limit_node)\ +#define malloc_select_values_stmt(node, result, values_node, order_by_node, approx_node, limit_node)\ do {\ /*gen select list*/\ ParseNode *star_node = NULL;\ @@ -1188,6 +1188,7 @@ do {\ node->children_[PARSE_SELECT_SELECT] = project_list_node;\ node->children_[PARSE_SELECT_FROM] = from_list;\ node->children_[PARSE_SELECT_ORDER] = order_by_node;\ + node->children_[PARSE_SELECT_APPROX] = approx_node;\ node->children_[PARSE_SELECT_LIMIT] = limit_node;\ } while(0);\ diff --git a/src/sql/parser/sql_parser_mysql_mode.l b/src/sql/parser/sql_parser_mysql_mode.l index 1cd855dd64..835e601ca3 100644 --- a/src/sql/parser/sql_parser_mysql_mode.l +++ b/src/sql/parser/sql_parser_mysql_mode.l @@ -1463,7 +1463,7 @@ BEGIN(in_c_comment); ((ParseResult *)yyextra)->has_encount_comment_ = true; /* ignore */ } -[-+&~|^/%*(),.:!{}] { +[-+&~|^/%*(),.:!{}\[\]] { if (IS_FAST_PARAMETERIZE) { ParseResult *p = (ParseResult *)yyextra; // for 'select - -1 from dual' diff --git a/src/sql/parser/sql_parser_mysql_mode.y b/src/sql/parser/sql_parser_mysql_mode.y index 49d2ed7a93..3130438237 100644 --- a/src/sql/parser/sql_parser_mysql_mode.y +++ b/src/sql/parser/sql_parser_mysql_mode.y @@ -127,6 +127,7 @@ extern void obsql_oracle_parse_fatal_error(int32_t errcode, yyscan_t yyscanner, %left '+' '-' %left '*' '/' '%' MOD DIV POW %left '^' +%left VECTOR_DISTANCE %nonassoc LOWER_THAN_NEG SAMPLE/* for simple_expr conflict*/ %left CNNOP %left NEG '~' @@ -264,7 +265,7 @@ END_P SET_VAR DELIMITER ACCESS ACCESSID ACCESSKEY ACCESSTYPE ACCOUNT ACTION ACTIVE ADDDATE AFTER AGAINST AGGREGATE ALGORITHM ALL_META ALL_USER ALWAYS ALLOW ANALYSE ANY APPROX_COUNT_DISTINCT APPROX_COUNT_DISTINCT_SYNOPSIS APPROX_COUNT_DISTINCT_SYNOPSIS_MERGE ARBITRATION ARRAY ASCII ASIS AT AUTHORS AUTO AUTOEXTEND_SIZE AUTO_INCREMENT AUTO_INCREMENT_MODE AUTO_INCREMENT_CACHE_SIZE - AVG AVG_ROW_LENGTH ACTIVATE AVAILABILITY ARCHIVELOG ASYNCHRONOUS AUDIT ADMIN AUTO_REFRESH + AVG AVG_ROW_LENGTH ACTIVATE AVAILABILITY ARCHIVELOG ASYNCHRONOUS AUDIT ADMIN AUTO_REFRESH APPROX APPROXIMATE BACKUP BACKUP_COPIES BALANCE BANDWIDTH BASE BASELINE BASELINE_ID BASIC BEGI BINDING SHARDING BINLOG BIT BIT_AND BIT_OR BIT_XOR BLOCK BLOCK_INDEX BLOCK_SIZE BLOOM_FILTER BOOL BOOLEAN BOOTSTRAP BTREE BYTE @@ -275,7 +276,7 @@ END_P SET_VAR DELIMITER CLASS_ORIGIN CLEAN CLEAR CLIENT CLONE CLOG CLOSE CLUSTER CLUSTER_ID CLUSTER_NAME COALESCE COLUMN_STAT CODE COLLATION COLUMN_FORMAT COLUMN_NAME COLUMNS COMMENT COMMIT COMMITTED COMPACT COMPLETION COMPLETE COMPRESSED COMPRESSION COMPRESSION_CODE COMPUTATION COMPUTE CONCURRENT CONDENSED CONDITIONAL CONNECTION CONSISTENT CONSISTENT_MODE CONSTRAINT_CATALOG - CONSTRAINT_NAME CONSTRAINT_SCHEMA CONTAINS CONTEXT CONTRIBUTORS COPY COUNT CPU CREATE_TIMESTAMP + CONSTRAINT_NAME CONSTRAINT_SCHEMA CONTAINS CONTEXT CONTRIBUTORS COPY COSINE COUNT CPU CREATE_TIMESTAMP CTXCAT CTX_ID CUBE CURDATE CURRENT STACKED CURTIME CURSOR_NAME CUME_DIST CYCLE CALC_PARTITION_ID CONNECT DAG DATA DATAFILE DATA_TABLE_ID DATA_SOURCE DATE DATE_ADD DATE_SUB DATETIME DAY DEALLOCATE DECRYPTION @@ -285,7 +286,7 @@ END_P SET_VAR DELIMITER EFFECTIVE EMPTY ENABLE ENABLE_ARBITRATION_SERVICE ENABLE_EXTENDED_ROWID ENCRYPTED ENCRYPTION END ENDPOINT ENDS ENFORCED ENGINE_ ENGINES ENUM ENTITY ERROR_CODE ERROR_P ERRORS ESTIMATE ESCAPE EVENT EVENTS EVERY EXCHANGE EXCLUDING EXECUTE EXPANSION EXPIRE EXPIRE_INFO EXPORT OUTLINE EXTENDED - EXTENDED_NOADDR EXTENT_SIZE EXTRACT EXCEPT EXPIRED ENCODING EMPTY_FIELD_AS_NULL EXTERNAL + EXTENDED_NOADDR EXTENT_SIZE EXTRACT EXCEPT EXPIRED ENCODING EMPTY_FIELD_AS_NULL EUCLIDEAN EXTERNAL FAILOVER FAST FAULTS FILE_BLOCK_SIZE FIELDS FILEX FINAL_COUNT FIRST FIRST_VALUE FIXED FLUSH FOLLOWER FORMAT FOUND FREEZE FREQUENCY FUNCTION FOLLOWING FLASHBACK FULL FRAGMENTATION FROZEN FILE_ID @@ -310,7 +311,7 @@ END_P SET_VAR DELIMITER LEVEL LN LOG LS LINK LOG_RESTORE_SOURCE LINE_DELIMITER - MAJOR MANUAL MASTER MASTER_AUTO_POSITION MASTER_CONNECT_RETRY MASTER_DELAY MASTER_HEARTBEAT_PERIOD + MAJOR MANHATTAN MANUAL MASTER MASTER_AUTO_POSITION MASTER_CONNECT_RETRY MASTER_DELAY MASTER_HEARTBEAT_PERIOD MASTER_HOST MASTER_LOG_FILE MASTER_LOG_POS MASTER_PASSWORD MASTER_PORT MASTER_RETRY_COUNT MASTER_SERVER_ID MASTER_SSL MASTER_SSL_CA MASTER_SSL_CAPATH MASTER_SSL_CERT MASTER_SSL_CIPHER MASTER_SSL_CRL MASTER_SSL_CRLPATH MASTER_SSL_KEY MASTER_USER MAX MAX_CONNECTIONS_PER_HOUR MAX_CPU @@ -366,7 +367,7 @@ END_P SET_VAR DELIMITER UNUSUAL UPGRADE USE_BLOOM_FILTER UNKNOWN USE_FRM USER USER_RESOURCES UNBOUNDED UP UNLIMITED USER_SPECIFIED VALID VALUE VARIANCE VARIABLES VERBOSE VERIFY VIEW VISIBLE VIRTUAL_COLUMN_ID VALIDATE VAR_POP - VAR_SAMP VALIDATION + VAR_SAMP VALIDATION VECTOR VECTOR_DISTANCE WAIT WARNINGS WASH WEEK WEIGHT_STRING WHENEVER WORK WRAPPER WINDOW WEAK WITH_COLUMN_GROUP WITHOUT @@ -416,7 +417,7 @@ END_P SET_VAR DELIMITER %type replace_with_opt_hint insert_with_opt_hint column_list opt_on_duplicate_key_clause opt_into opt_replace opt_temporary opt_algorithm opt_sql_security opt_definer view_algorithm no_param_column_ref %type insert_vals_list insert_vals value_or_values opt_insert_row_alias %type select_with_parens select_no_parens select_clause select_into no_table_select_with_order_and_limit simple_select_with_order_and_limit select_with_parens_with_order_and_limit select_clause_set select_clause_set_left select_clause_set_right select_clause_set_with_order_and_limit -%type simple_select no_table_select limit_clause select_expr_list +%type simple_select no_table_select limit_clause select_expr_list opt_approx %type with_select with_clause with_list common_table_expr opt_column_alias_name_list alias_name_list column_alias_name %type opt_where opt_hint_value opt_groupby opt_rollup opt_order_by order_by opt_having groupby_clause %type opt_limit_clause limit_expr opt_lock_type opt_for_update opt_for_update_wait opt_lock_in_share_mode @@ -542,6 +543,7 @@ END_P SET_VAR DELIMITER %type external_table_partitions external_table_partition %type skip_index_type opt_skip_index_type_list %type opt_rebuild_column_store +%type vec_index_params vec_index_param vec_index_param_value %type json_table_expr mock_jt_on_error_on_empty jt_column_list json_table_column_def %type json_table_ordinality_column_def json_table_exists_column_def json_table_value_column_def json_table_nested_column_def %type opt_value_on_empty_or_error_or_mismatch opt_on_mismatch @@ -556,6 +558,8 @@ END_P SET_VAR DELIMITER %type service_name_stmt service_op %type ttl_definition ttl_expr ttl_unit %type id_dot_id id_dot_id_dot_id +%type vector_distance_expr vector_distance_metric +%type any_expr %type opt_empty_table_list opt_repair_mode opt_repair_option_list repair_option repair_option_list opt_checksum_option %type cache_index_stmt load_index_into_cache_stmt tbl_index_list tbl_index tbl_partition_list opt_tbl_partition_list tbl_index_or_partition_list tbl_index_or_partition opt_ignore_leaves key_cache_name %start sql_stmt @@ -1306,13 +1310,18 @@ bool_pri IS NULLX %prec IS check_ret(setup_token_pos_info_and_dup_string($$, result, @1.first_column, @3.last_column), &@1, result); } -| bool_pri COMP_EQ sub_query_flag select_with_parens %prec COMP_EQ +| bool_pri COMP_EQ sub_query_flag any_expr %prec COMP_EQ { - ParseNode *sub_query = NULL; - malloc_non_terminal_node(sub_query, result->malloc_pool_, $3->type_, 1, $4); - malloc_non_terminal_node($$, result->malloc_pool_, T_OP_EQ, 2, $1, sub_query); - check_ret(setup_token_pos_info_and_dup_string($$, result, @1.first_column, @4.last_column), - &@1, result); + if ($4->type_ == T_EXPR_LIST && $4->reserved_ == 1) { + /* rewrite any operation to array_contains expr*/ + malloc_non_terminal_node($$, result->malloc_pool_, T_FUNC_SYS_ARRAY_CONTAINS, 2, $1, $4); + } else { + ParseNode *sub_query = NULL; + malloc_non_terminal_node(sub_query, result->malloc_pool_, $3->type_, 1, $4); + malloc_non_terminal_node($$, result->malloc_pool_, T_OP_EQ, 2, $1, sub_query); + check_ret(setup_token_pos_info_and_dup_string($$, result, @1.first_column, @4.last_column), + &@1, result); + } } | bool_pri COMP_NSEQ predicate %prec COMP_NSEQ { @@ -1930,6 +1939,18 @@ ALL malloc_terminal_node($$, result->malloc_pool_, T_ANY); } +any_expr: +select_with_parens %prec NEG +{ + $$ = $1; +} +| '(' expr_list ')' +{ + $$ = $2; + $$->reserved_ = 1; /* means it's param of array_contains func_expr */ +} +; + in_expr: select_with_parens %prec NEG @@ -3236,6 +3257,18 @@ MOD '(' expr ',' expr ')' { malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SUM_OPNSIZE, 2, NULL, $3); } +| ARRAY '(' expr_list ')' +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS_ARRAY, 1, $3); +} +| '[' expr_list ']' +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS_ARRAY, 1, $2); +} +| vector_distance_expr +{ + $$ = $1; +} | RB_BUILD_AGG '(' expr ')' { malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS_RB_BUILD_AGG, 1, $3); @@ -3253,6 +3286,45 @@ MOD '(' expr ',' expr ')' } ; +vector_distance_expr: +VECTOR_DISTANCE '(' expr ',' expr ')' +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS_VECTOR_DISTANCE, 2, $3, $5); +} +| +VECTOR_DISTANCE '(' expr ',' expr ',' vector_distance_metric ')' +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS_VECTOR_DISTANCE, 3, $3, $5, $7); +} +; + +vector_distance_metric: +COSINE +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->is_hidden_const_ = 1; + $$->value_ = 0; +} +| DOT +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->is_hidden_const_ = 1; + $$->value_ = 1; +} +| EUCLIDEAN +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->is_hidden_const_ = 1; + $$->value_ = 2; +} +| MANHATTAN +{ + malloc_terminal_node($$, result->malloc_pool_, T_INT); + $$->is_hidden_const_ = 1; + $$->value_ = 3; +} +; + mvt_param: STRING_VALUE { $$ = $1; } | INTNUM { $$ = $1; } @@ -5402,6 +5474,16 @@ column_definition malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 5, $3, col_list, index_option, $4, NULL); $$->value_ = 3; } +| VECTOR key_or_index opt_index_name '(' sort_column_list ')' opt_index_option_list +{ + (void)($2); + ParseNode *col_list = NULL; + ParseNode *index_option = NULL; + merge_nodes(col_list, result, T_INDEX_COLUMN_LIST, $5); + merge_nodes(index_option, result, T_TABLE_OPTION_LIST, $7); + malloc_non_terminal_node($$, result->malloc_pool_, T_INDEX, 5, $3, col_list, index_option, NULL, NULL); + $$->value_ = 6; +} | CONSTRAINT opt_constraint_name FOREIGN KEY opt_index_name '(' column_name_list ')' REFERENCES relation_factor '(' column_name_list ')' opt_match_option opt_reference_option_list { ParseNode *child_col_list= NULL; @@ -6405,6 +6487,21 @@ int_type_i opt_int_length_i opt_unsigned_i opt_zerofill_i $$->int32_values_[0] = 0; /* length */ $$->int32_values_[1] = 7; /* geometrycollection, geometry uses collation type value convey sub geometry type. */ } +| ARRAY '(' data_type ')' +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_COLLECTION, 1, $3); + $$->int32_values_[0] = 0; /* arry type */ +} +| data_type '[' ']' +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_COLLECTION, 1, $1); + $$->int32_values_[0] = 0; /* arry type */ +} +| VECTOR '(' INTNUM ')' +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_COLLECTION, 1, $3); + $$->int32_values_[0] = 1; /* vector type */ +} | ROARINGBITMAP { malloc_terminal_node($$, result->malloc_pool_, T_ROARINGBITMAP); @@ -9060,7 +9157,8 @@ ALTER {$$ = NULL;} ; opt_index_keyname: -FULLTEXT { $$[0] = 3; } +VECTOR { $$[0] = 6; } +| FULLTEXT { $$[0] = 3; } | SPATIAL { $$[0] = 2; } | UNIQUE { $$[0] = 1; } | /*EMPTY*/ { $$[0] = 0; } @@ -9259,6 +9357,11 @@ GLOBAL { malloc_terminal_node($$, result->malloc_pool_, T_WITH_ROWID); } +| WITH '(' vec_index_params ')' +{ + merge_nodes($$, result, T_VEC_INDEX_PARAMS, $3); + dup_expr_string($$, result, @3.first_column, @3.last_column); +} | WITH PARSER relation_name { malloc_non_terminal_node($$, result->malloc_pool_, T_PARSER_NAME, 1, $3); @@ -9719,8 +9822,8 @@ dml_table_name values_clause malloc_non_terminal_node(into_node, result->malloc_pool_, T_INSERT_INTO_CLAUSE, 2, $1, column_list_node); malloc_non_terminal_node(values_list_node, result->malloc_pool_, T_VALUES_ROW_LIST, 1, value_vector_node); malloc_non_terminal_node(values_node, result->malloc_pool_, T_VALUES_TABLE_EXPRESSION, 1, values_list_node); - malloc_select_values_stmt(subquery_node, result, values_node, NULL, NULL); - malloc_select_values_stmt(select_node, result, subquery_node, NULL, NULL); + malloc_select_values_stmt(subquery_node, result, values_node, NULL, NULL, NULL); + malloc_select_values_stmt(select_node, result, subquery_node, NULL, NULL, NULL); select_node->children_[PARSE_SELECT_FROM]->children_[0]->children_[1] = $4; val_list = select_node; val_list->reserved_ = 1; @@ -9749,8 +9852,8 @@ value_or_values insert_vals_list opt_insert_row_alias malloc_non_terminal_node(values_list_node, result->malloc_pool_, T_VALUES_ROW_LIST, 1, $2); } malloc_non_terminal_node(values_node, result->malloc_pool_, T_VALUES_TABLE_EXPRESSION, 1, values_list_node); - malloc_select_values_stmt(subquery_node, result, values_node, NULL, NULL); - malloc_select_values_stmt($$, result, subquery_node, NULL, NULL); + malloc_select_values_stmt(subquery_node, result, values_node, NULL, NULL, NULL); + malloc_select_values_stmt($$, result, subquery_node, NULL, NULL, NULL); $$->children_[PARSE_SELECT_FROM]->children_[0]->children_[1] = $3; $$->reserved_ = 1; } else { @@ -10279,6 +10382,17 @@ no_table_select order_by } ; +opt_approx: +APPROX +{ + malloc_terminal_node($$, result->malloc_pool_, T_APPROX); +} +| APPROXIMATE +{ + malloc_terminal_node($$, result->malloc_pool_, T_APPROX); +} +; + simple_select_with_order_and_limit: simple_select order_by { @@ -10289,8 +10403,16 @@ simple_select order_by { $$ = $1; $$->children_[PARSE_SELECT_ORDER] = $2; + $$->children_[PARSE_SELECT_APPROX] = NULL; $$->children_[PARSE_SELECT_LIMIT] = $3; } +| simple_select order_by opt_approx limit_clause +{ + $$ = $1; + $$->children_[PARSE_SELECT_ORDER] = $2; + $$->children_[PARSE_SELECT_APPROX] = $3; + $$->children_[PARSE_SELECT_LIMIT] = $4; +} ; select_with_parens_with_order_and_limit: @@ -13689,7 +13811,7 @@ VALUES values_row_list ParseNode *value_list = NULL; merge_nodes(value_list, result, T_VALUES_ROW_LIST, $2); malloc_non_terminal_node(values_node, result->malloc_pool_, T_VALUES_TABLE_EXPRESSION, 1, value_list); - malloc_select_values_stmt($$, result, values_node, NULL, NULL); + malloc_select_values_stmt($$, result, values_node, NULL, NULL, NULL); } ; @@ -13700,7 +13822,7 @@ VALUES values_row_list order_by ParseNode *value_list = NULL; merge_nodes(value_list, result, T_VALUES_ROW_LIST, $2); malloc_non_terminal_node(values_node, result->malloc_pool_, T_VALUES_TABLE_EXPRESSION, 1, value_list); - malloc_select_values_stmt($$, result, values_node, $3, NULL); + malloc_select_values_stmt($$, result, values_node, $3, NULL, NULL); } | VALUES values_row_list opt_order_by limit_clause { @@ -13708,7 +13830,7 @@ VALUES values_row_list order_by ParseNode *value_list = NULL; merge_nodes(value_list, result, T_VALUES_ROW_LIST, $2); malloc_non_terminal_node(values_node, result->malloc_pool_, T_VALUES_TABLE_EXPRESSION, 1, value_list); - malloc_select_values_stmt($$, result, values_node, $3, $4); + malloc_select_values_stmt($$, result, values_node, $3, NULL, $4); } ; @@ -22445,6 +22567,39 @@ opt_on_mismatch: } ; +/*=========================================================== + * + * vector index + * + *===========================================================*/ + +vec_index_params: +vec_index_param +{ + $$ = $1; +} +| vec_index_params ',' vec_index_param +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 2, $1, $3); +}; + +vec_index_param: +relation_name COMP_EQ vec_index_param_value +{ + malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 2, $1, $3); +}; + +vec_index_param_value: +INTNUM +{ + $1->type_ = T_NUMBER; + $$ = $1; +} +| relation_name +{ + $$ = $1; +}; + /*=========================================================== * * json query @@ -23099,6 +23254,8 @@ ACCESSID | ALWAYS | ANALYSE | ANY +| APPROX +| APPROXIMATE | APPROX_COUNT_DISTINCT | APPROX_COUNT_DISTINCT_SYNOPSIS | APPROX_COUNT_DISTINCT_SYNOPSIS_MERGE @@ -23208,6 +23365,7 @@ ACCESSID | CONTRIBUTORS | COPY | COUNT +| COSINE | CPU | CREATE_TIMESTAMP | CTXCAT @@ -23281,6 +23439,7 @@ ACCESSID | ERRORS | ESCAPE | ESTIMATE +| EUCLIDEAN | EVENTS | EVERY | EXCEPT %prec HIGHER_PARENS @@ -23407,6 +23566,7 @@ ACCESSID | LOGS | LOG_RESTORE_SOURCE | MAJOR +| MANHATTAN | MANUAL | MASTER | MASTER_AUTO_POSITION @@ -23816,6 +23976,8 @@ ACCESSID | VAR_POP | VAR_SAMP | VERBOSE +| VECTOR +| VECTOR_DISTANCE | VIRTUAL_COLUMN_ID | MATERIALIZED | VIEW diff --git a/src/sql/printer/ob_dml_stmt_printer.cpp b/src/sql/printer/ob_dml_stmt_printer.cpp index 46724cd6f7..d7d51ccf76 100644 --- a/src/sql/printer/ob_dml_stmt_printer.cpp +++ b/src/sql/printer/ob_dml_stmt_printer.cpp @@ -2013,6 +2013,18 @@ int ObDMLStmtPrinter::print_order_by() return ret; } +int ObDMLStmtPrinter::print_approx() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(stmt_) || OB_ISNULL(buf_) || OB_ISNULL(pos_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("stmt_ is NULL or buf_ is NULL or pos_ is NULL", K(ret)); + } else if (stmt_->has_vec_approx()) { + DATA_PRINTF(" approx "); + } + return ret; +} + int ObDMLStmtPrinter::print_limit() { int ret = OB_SUCCESS; diff --git a/src/sql/printer/ob_dml_stmt_printer.h b/src/sql/printer/ob_dml_stmt_printer.h index c6ba47cfe5..bffa34545f 100644 --- a/src/sql/printer/ob_dml_stmt_printer.h +++ b/src/sql/printer/ob_dml_stmt_printer.h @@ -105,6 +105,7 @@ public: int print_semi_info_to_subquery(); int print_where(); int print_order_by(); + int print_approx(); int print_limit(); int print_fetch(); int print_returning(); diff --git a/src/sql/printer/ob_raw_expr_printer.cpp b/src/sql/printer/ob_raw_expr_printer.cpp index e07fe83823..6e63533637 100644 --- a/src/sql/printer/ob_raw_expr_printer.cpp +++ b/src/sql/printer/ob_raw_expr_printer.cpp @@ -865,6 +865,10 @@ int ObRawExprPrinter::print(ObOpRawExpr *expr) SET_SYMBOL_IF_EMPTY("geomcollection"); break; } + case T_FUN_SYS_ARRAY: { + SET_SYMBOL_IF_EMPTY("array"); + break; + } default: { ret = OB_ERR_UNEXPECTED; LOG_WARN("unknown expr type", K(ret), "type", get_type_name(type)); diff --git a/src/sql/printer/ob_select_stmt_printer.cpp b/src/sql/printer/ob_select_stmt_printer.cpp index 657370c1ed..8418e1455c 100644 --- a/src/sql/printer/ob_select_stmt_printer.cpp +++ b/src/sql/printer/ob_select_stmt_printer.cpp @@ -386,6 +386,8 @@ int ObSelectStmtPrinter::print_basic_stmt() LOG_WARN("fail to print having", K(ret), K(*stmt_)); } else if (OB_FAIL(print_order_by())) { LOG_WARN("fail to print order by", K(ret), K(*stmt_)); + } else if (OB_FAIL(print_approx())) { + LOG_WARN("fail to print order by", K(ret), K(*stmt_)); } else if (OB_FAIL(print_limit())) { LOG_WARN("fail to print limit", K(ret), K(*stmt_)); } else if (OB_FAIL(print_fetch())) { diff --git a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp index d83a7646dd..bac77817ab 100644 --- a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp +++ b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp @@ -30,6 +30,7 @@ #include "lib/xml/ob_xml_parser.h" #include "lib/xml/ob_xml_util.h" #include "share/table/ob_ttl_util.h" +#include "share/vector_index/ob_plugin_vector_index_util.h" namespace oceanbase { @@ -1542,7 +1543,17 @@ int ObAlterTableResolver::resolve_index_column_list(const ParseNode &node, } else { sort_item.prefix_len_ = 0; } - + if (OB_SUCC(ret)) { + const ObColumnSchemaV2 *column_schema = NULL; + if (is_oracle_mode()) { // oracle mode is not support vector column yet + } else if (OB_NOT_NULL(column_schema = table_schema_->get_column_schema(sort_item.column_name_))) { + if (ob_is_collection_sql_type(column_schema->get_data_type()) && index_keyname_ != VEC_KEY) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support index create on vector column yet", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create index on vector column is"); + } + } + } if (OB_FAIL(ret)) { // do nothing } else if (index_keyname_ == FTS_KEY) { @@ -1556,6 +1567,11 @@ int ObAlterTableResolver::resolve_index_column_list(const ParseNode &node, SQL_RESV_LOG(WARN, "check fts index constraint fail",K(ret), K(sort_item.column_name_)); } + } else if (index_keyname_ == VEC_KEY) { + // TODO@xiajin + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support alter table to modify vector index yet", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "alter table to modify vector index is"); } else { // spatial index, NOTE resolve_spatial_index_constraint() will set index_keyname ObSEArray resolved_cols; ObAlterTableStmt *alter_table_stmt = get_alter_table_stmt(); diff --git a/src/sql/resolver/ddl/ob_create_index_resolver.cpp b/src/sql/resolver/ddl/ob_create_index_resolver.cpp index 394ac6b0e3..2d3b0d3bd2 100644 --- a/src/sql/resolver/ddl/ob_create_index_resolver.cpp +++ b/src/sql/resolver/ddl/ob_create_index_resolver.cpp @@ -18,6 +18,9 @@ #include "sql/resolver/ddl/ob_create_index_stmt.h" #include "sql/session/ob_sql_session_info.h" #include "sql/ob_sql_utils.h" +#include "share/ob_vec_index_builder_util.h" +#include "share/vector_index/ob_plugin_vector_index_util.h" + namespace oceanbase { using namespace common; @@ -156,6 +159,12 @@ int ObCreateIndexResolver::resolve_index_column_node( SQL_RESV_LOG(WARN, "add session id key failed", K(ret)); } bool cnt_func_index = false; + const bool is_vec_index = (index_keyname_ == INDEX_KEYNAME::VEC_KEY); + if (is_vec_index && index_column_node->num_child_ >= 2) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("multi column of vector index is not support yet", K(ret), K(index_column_node->num_child_)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "multi vector index column is"); + } for (int32_t i = 0; OB_SUCC(ret) && i < index_column_node->num_child_; ++i) { ParseNode *col_node = index_column_node->children_[i]; ObColumnSortItem sort_item; @@ -184,6 +193,17 @@ int ObCreateIndexResolver::resolve_index_column_node( LOG_WARN("not support dynaimic create multivlaue index", K(ret)); LOG_USER_ERROR(OB_NOT_SUPPORTED, "not support dynaimic create multivlaue index"); } + if (OB_SUCC(ret)) { + const ObColumnSchemaV2 *column_schema = NULL; + if (is_oracle_mode()) { // oracle mode is not support vector column yet + } else if (OB_NOT_NULL(column_schema = tbl_schema->get_column_schema(sort_item.column_name_))) { + if (ob_is_collection_sql_type(column_schema->get_data_type()) && index_keyname_ != INDEX_KEYNAME::VEC_KEY) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support index create on vector column yet", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create index on vector column is"); + } + } + } } // 前缀索引的前缀长度 if (OB_FAIL(ret)) { @@ -211,6 +231,17 @@ int ObCreateIndexResolver::resolve_index_column_node( SQL_RESV_LOG(WARN, "check fts index constraint fail",K(ret), K(sort_item.column_name_)); } + } else if (index_keyname_ == INDEX_KEYNAME::VEC_KEY) { + if (sort_item.is_func_index_) { + ret = OB_ERR_FUNCTIONAL_INDEX_ON_FIELD; + LOG_WARN("Functional index for vector index is not supported.", K(ret), K(sort_item)); + } else if (OB_FAIL(resolve_vec_index_constraint(*tbl_schema, + *schema_checker_, + sort_item.column_name_, + index_keyname_value, + table_option_node))) { + SQL_RESV_LOG(WARN, "check vec index constraint fail",K(ret), K(sort_item.column_name_)); + } } else { // spatial index, NOTE resolve_spatial_index_constraint() will set index_keyname bool is_explicit_order = (NULL != col_node->children_[2] && 1 != col_node->children_[2]->is_empty_); @@ -503,7 +534,6 @@ int ObCreateIndexResolver::resolve(const ParseNode &parse_tree) stmt_ = crt_idx_stmt; if_not_exist_node = parse_tree.children_[7]; } - // 将session中的信息添写到 stmt 的 arg 中 // 包括 nls_xx_format if (OB_SUCC(ret)) { @@ -653,6 +683,18 @@ int ObCreateIndexResolver::resolve(const ParseNode &parse_tree) LOG_WARN("resolve hints failed", K(ret)); } } + if (OB_SUCC(ret)) { + ObCreateIndexArg &index_arg = crt_idx_stmt->get_create_index_arg(); + if (is_vec_index(index_arg.index_type_)) { + index_arg.index_schema_.set_index_params(index_params_); + if (tbl_schema->is_view_table()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("create vector index on view table is not supported", + KR(ret), K(tbl_schema->get_table_name())); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create vector index on view table is"); + } + } + } if (OB_SUCC(ret) && ObSchemaChecker::is_ora_priv_check()) { OZ (schema_checker_->check_ora_ddl_priv(session_info_->get_effective_tenant_id(), @@ -763,6 +805,21 @@ int ObCreateIndexResolver::set_table_option_to_stmt(bool is_partitioned) // set type to fts_index_aux first, append other fts arg later index_arg.index_type_ = INDEX_TYPE_FTS_INDEX_LOCAL; } + } else if (INDEX_KEYNAME::VEC_KEY == index_keyname_) { + uint64_t tenant_data_version = 0; + uint64_t tenant_id = session_info_->get_effective_tenant_id(); + if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get tenant data version failed", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_3_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.3, create vector index on existing table not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.3, vector index"); + } else if (global_) { + // TODO @lhd support global index? + ret = OB_NOT_SUPPORTED; + } else { + index_arg.index_type_ = INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL; + } } index_arg.data_table_id_ = data_table_id_; index_arg.index_table_id_ = index_table_id_; @@ -771,6 +828,7 @@ int ObCreateIndexResolver::set_table_option_to_stmt(bool is_partitioned) index_arg.index_option_.use_bloom_filter_ = use_bloom_filter_; index_arg.index_option_.progressive_merge_num_ = progressive_merge_num_; index_arg.index_option_.index_attributes_set_ = index_attributes_set_; + index_arg.index_option_.parser_name_ = parser_name_; index_arg.with_rowid_ = with_rowid_; index_arg.index_schema_.set_data_table_id(data_table_id_); index_arg.index_schema_.set_table_id(index_table_id_); @@ -778,6 +836,9 @@ int ObCreateIndexResolver::set_table_option_to_stmt(bool is_partitioned) create_index_stmt->set_comment(comment_); create_index_stmt->set_tablespace_id(tablespace_id_); if (OB_FAIL(ret)) { + } else if (INDEX_KEYNAME::VEC_KEY == index_keyname_ && + OB_FAIL(ObVecIndexBuilderUtil::generate_vec_index_name(allocator_, index_arg.index_type_, index_arg.index_name_, index_arg.index_name_))) { + LOG_WARN("generate vec parser name failed", K(ret), K(index_arg)); } else if (OB_FAIL(create_index_stmt->set_encryption_str(encryption_))) { LOG_WARN("fail to set encryption str", K(ret)); } diff --git a/src/sql/resolver/ddl/ob_create_table_resolver.cpp b/src/sql/resolver/ddl/ob_create_table_resolver.cpp index f64136e881..e9343993ad 100644 --- a/src/sql/resolver/ddl/ob_create_table_resolver.cpp +++ b/src/sql/resolver/ddl/ob_create_table_resolver.cpp @@ -40,6 +40,7 @@ #include "sql/resolver/cmd/ob_help_resolver.h" #include "lib/charset/ob_template_helper.h" #include "sql/optimizer/ob_optimizer_util.h" +#include "share/vector_index/ob_vector_index_util.h" namespace oceanbase @@ -61,7 +62,8 @@ ObCreateTableResolver::ObCreateTableResolver(ObResolverParams ¶ms) is_temp_table_pk_added_(false), index_arg_(), current_index_name_set_(), - cur_udt_set_id_(0) + cur_udt_set_id_(0), + vec_index_col_ids_() { } @@ -1308,6 +1310,19 @@ int ObCreateTableResolver::resolve_primary_key_node(const ParseNode &pk_node, SQL_RESV_LOG(WARN, "add primary key part failed", K(ret), K(key_name)); } } + if (OB_SUCC(ret)) { + ObCreateTableStmt *create_table_stmt = static_cast(stmt_); + ObTableSchema &table_schema = create_table_stmt->get_create_table_arg().schema_; + const ObColumnSchemaV2 *column_schema = NULL; + if (is_oracle_mode()) { // oracle mode is not support vector column yet + } else if (OB_NOT_NULL(column_schema = table_schema.get_column_schema(key_name))) { + if (ob_is_collection_sql_type(column_schema->get_data_type())) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support primary key is vector column yet", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create primary key on vector column is"); + } + } + } } } } @@ -2101,7 +2116,8 @@ int ObCreateTableResolver::resolve_table_elements_from_select(const ParseNode &p column_meta.set_type(ObLongTextType); } column.set_meta_type(column_meta); - if (column.is_enum_or_set()) { + if (column.is_enum_or_set() + || column.is_collection()) { // array column if (OB_FAIL(column.set_extended_type_info(expr->get_enum_set_values()))) { LOG_WARN("set enum or set info failed", K(ret), K(*expr)); } @@ -2356,6 +2372,22 @@ int ObCreateTableResolver::generate_index_arg() } else { type = INDEX_TYPE_SPATIAL_LOCAL; } + } else if (VEC_KEY == index_keyname_) { + const int64_t tenant_id = session_info_->get_effective_tenant_id(); + if (tenant_data_version < DATA_VERSION_4_3_3_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.3, vector index not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.3, vector index"); + } else if (global_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support global vec index now", K(ret)); + } else if (!is_user_tenant(tenant_id)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant is not user tenant vector index not supported", K(ret), K(tenant_id)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "not user tenant create vector index is"); + } else { + type = INDEX_TYPE_VEC_ROWKEY_VID_LOCAL; + } } else if (FTS_KEY == index_keyname_) { if (tenant_data_version < DATA_VERSION_4_3_1_0) { ret = OB_NOT_SUPPORTED; @@ -2569,6 +2601,7 @@ int ObCreateTableResolver::resolve_index( ret = OB_INVALID_ARGUMENT; SQL_RESV_LOG(WARN, "invalid argument.", K(ret), K(node->children_)); } else { + vec_index_col_ids_.reset(); for (int64_t i = 0; OB_SUCC(ret) && i < index_node_position_list.size(); ++i) { reset(); index_attributes_set_ = OB_DEFAULT_INDEX_ATTRIBUTES_SET; @@ -2617,6 +2650,7 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) ObColumnSchemaV2 *column_schema = NULL; ObCreateTableStmt *create_table_stmt = static_cast(stmt_); ObTableSchema &tbl_schema = create_table_stmt->get_create_table_arg().schema_; + int64_t vec_index_col_id = 0; if(ObItemType::T_INDEX == node->type_) { //if index_name is not specified, new index_name will be generated //by the first_column_name, so resolve the index_column_list_node firstly. @@ -2642,6 +2676,13 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) } bool cnt_func_index_mysql = false; bool is_multi_value_index = false; + const bool is_vec_index = (index_keyname_ == INDEX_KEYNAME::VEC_KEY); + if (OB_FAIL(ret)) { + } else if (is_vec_index && index_column_list_node->num_child_ >= 2) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("multi column of vector index is not support yet", K(ret), K(index_column_list_node->num_child_)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "multi vector index column is"); + } for (int32_t i = 0; OB_SUCC(ret) && i < index_column_list_node->num_child_; ++i) { ObString &column_name = sort_item.column_name_; if (NULL == index_column_list_node->children_[i] @@ -2650,14 +2691,16 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) SQL_RESV_LOG(WARN, "invalid index_column_list_node.", K(ret)); } else { index_column_node = index_column_list_node->children_[i]; + } + if (OB_SUCC(ret)) { if (OB_ISNULL(index_column_node->children_) || index_column_node->num_child_ < 3 || OB_ISNULL(index_column_node->children_[0])) { ret = OB_ERR_UNEXPECTED; SQL_RESV_LOG(WARN, "invalid index_column_node.", K(ret), - K(index_column_node->num_child_), - K(index_column_node->children_), - K(index_column_node->children_[0])); + K(index_column_node->num_child_), + K(index_column_node->children_), + K(index_column_node->children_[0])); } else { //column_name if (index_column_node->children_[0]->type_ != T_IDENT) { @@ -2735,6 +2778,9 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) if (ob_is_geometry(expr->get_data_type()) || static_cast(INDEX_KEYNAME::SPATIAL_KEY) == node->value_) { ret = OB_ERR_SPATIAL_FUNCTIONAL_INDEX; LOG_WARN("Spatial functional index is not supported.", K(ret), K(column_name)); + } else if (ob_is_collection_sql_type(expr->get_data_type()) || static_cast(INDEX_KEYNAME::VEC_KEY) == node->value_) { + ret = OB_ERR_FUNCTIONAL_INDEX_ON_FIELD; + LOG_WARN("Functional index for vector index is not supported.", K(ret), K(column_name)); } else if (OB_FAIL(ObIndexBuilderUtil::generate_ordinary_generated_column(*expr, session_info_->get_sql_mode(), tbl_schema, @@ -2764,6 +2810,15 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, column_name.length(), column_name.ptr()); } } + if (OB_FAIL(ret)) { + } else if (is_vec_index) { + vec_index_col_id = column_schema->get_column_id(); + if (ObVectorIndexUtil::has_multi_index_on_same_column(vec_index_col_ids_, vec_index_col_id)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("more than one vector index on same column is not supported", K(ret), K(vec_index_col_id), K(vec_index_col_ids_)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "more than one vector index on same column is"); + } + } if (OB_SUCC(ret)) { if (OB_ISNULL(session_info_)) { ret = OB_NOT_INIT; @@ -2771,6 +2826,12 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) } else if (sort_item.prefix_len_ > column_schema->get_data_length()) { ret = OB_WRONG_SUB_KEY; SQL_RESV_LOG(WARN, "prefix length is longer than column length", K(sort_item), K(column_schema->get_data_length()), K(ret)); + } else if (!is_oracle_mode // oracle mode is not support vector column yet + && ob_is_collection_sql_type(column_schema->get_data_type()) + && static_cast(INDEX_KEYNAME::VEC_KEY) != node->value_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("index column is vector column, but is not vector index is not supported", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "vector column index but not vector index is"); } else if (ob_is_text_tc(column_schema->get_data_type()) && static_cast(INDEX_KEYNAME::FTS_KEY) != node->value_) { if (column_schema->is_hidden()) { @@ -2785,6 +2846,10 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) index_column_list_node->num_child_, node->value_, is_oracle_mode, NULL != index_column_node->children_[2] && 1 != index_column_node->children_[2]->is_empty_))) { SQL_RESV_LOG(WARN, "fail to resolve spatial index constraint", K(ret), K(column_name)); + } else if (OB_FAIL(resolve_vec_index_constraint(*column_schema, + node->value_, + node->children_[2]))) { + SQL_RESV_LOG(WARN, "fail to resolve vec index constraint", K(ret), K(column_name)); } else if (OB_FAIL(resolve_fts_index_constraint(*column_schema, node->value_))) { SQL_RESV_LOG(WARN, "fail to resolve fts index constraint", K(ret), K(column_name)); @@ -3020,7 +3085,21 @@ int ObCreateTableResolver::resolve_index_node(const ParseNode *node) } } if (OB_SUCC(ret)) { - if (is_fts_index(index_arg_.index_type_)) { + if (is_vec_index(index_arg_.index_type_)) { + // set index_params to create_index_arg, then will pass to index_arg_list + create_index_arg.index_schema_.set_index_params(index_params_); + if (OB_FAIL(ObDDLResolver::append_vec_args(resolve_result, + create_index_arg, + have_generate_vec_arg_, + resolve_results, + index_arg_list, + allocator_, + session_info_))) { + LOG_WARN("failed to append vec args", K(ret)); + } else if (OB_FAIL(vec_index_col_ids_.push_back(vec_index_col_id))) { + LOG_WARN("fail to push back vec index col id", K(ret)); + } + } else if (is_fts_index(index_arg_.index_type_)) { if (OB_FAIL(ObDDLResolver::append_fts_args(resolve_result, create_index_arg, have_generate_fts_arg_, diff --git a/src/sql/resolver/ddl/ob_create_table_resolver.h b/src/sql/resolver/ddl/ob_create_table_resolver.h index b6318b7c9c..a57d43de2e 100644 --- a/src/sql/resolver/ddl/ob_create_table_resolver.h +++ b/src/sql/resolver/ddl/ob_create_table_resolver.h @@ -152,6 +152,7 @@ private: common::ObSEArray constraint_exprs_;//store constraint exprs uint64_t cur_udt_set_id_; + common::ObSEArray vec_index_col_ids_; }; } // end namespace sql diff --git a/src/sql/resolver/ddl/ob_create_view_resolver.cpp b/src/sql/resolver/ddl/ob_create_view_resolver.cpp index 4c1988df2c..08e571ad2e 100644 --- a/src/sql/resolver/ddl/ob_create_view_resolver.cpp +++ b/src/sql/resolver/ddl/ob_create_view_resolver.cpp @@ -577,6 +577,10 @@ int ObCreateViewResolver::resolve_primary_key_node(ParseNode &pk_node, table_schema, i, pk_data_length, col))) { LOG_WARN("failed to add primary key part", K(ret), K(i)); + } else if (!is_oracle_mode() && ob_is_collection_sql_type(col->get_data_type())) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support primary key is vector column yet", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create primary key on vector column is"); } } if (OB_FAIL(ret) || is_oracle_mode()) { @@ -1768,7 +1772,8 @@ int ObCreateViewResolver::fill_column_meta_infos(const ObRawExpr &expr, column.set_nullable(expr.get_result_type().is_not_null_for_read() ? false : true); } if (OB_FAIL(ret)) { - } else if (column.is_enum_or_set() && OB_FAIL(column.set_extended_type_info(expr.get_enum_set_values()))) { + } else if ((column.is_enum_or_set() || column.is_collection()) + && OB_FAIL(column.set_extended_type_info(expr.get_enum_set_values()))) { LOG_WARN("set enum or set info failed", K(ret), K(expr)); } else if (OB_FAIL(adjust_string_column_length_within_max(column, lib::is_oracle_mode()))) { LOG_WARN("failed to adjust string column length within max", K(ret), K(expr)); @@ -1802,7 +1807,8 @@ int ObCreateViewResolver::resolve_column_default_value(const sql::ObSelectStmt * LOG_WARN("failed to resolve default value", K(ret)); } else if (OB_FAIL(ob_write_obj(alloc, column_item.default_value_, res_obj))) { LOG_WARN("failed to write obj", K(ret)); - } else if (ob_is_enum_or_set_type(column_item.default_value_.get_type())) { + } else if (ob_is_enum_or_set_type(column_item.default_value_.get_type()) + || ob_is_collection_sql_type(column_item.default_value_.get_type())) { if (OB_FAIL(column_schema.set_extended_type_info(select_item.expr_->get_enum_set_values()))) { LOG_WARN("failed to set extended type info", K(ret)); } diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.cpp b/src/sql/resolver/ddl/ob_ddl_resolver.cpp index 30a55ebeb8..466ad28d27 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.cpp +++ b/src/sql/resolver/ddl/ob_ddl_resolver.cpp @@ -44,6 +44,7 @@ #include "sql/engine/expr/ob_expr_lob_utils.h" #include "pl/ob_pl_stmt.h" #include "share/table/ob_ttl_util.h" +#include "share/ob_vec_index_builder_util.h" #include "common/ob_smart_call.h" namespace oceanbase { @@ -119,6 +120,7 @@ ObDDLResolver::ObDDLResolver(ObResolverParams ¶ms) have_generate_fts_arg_(false), is_set_lob_inrow_threshold_(false), lob_inrow_threshold_(OB_DEFAULT_LOB_INROW_THRESHOLD), + have_generate_vec_arg_(false), auto_increment_cache_size_(0), external_table_format_type_(ObExternalFileFormat::INVALID_FORMAT), mocked_external_table_column_ids_(), @@ -131,6 +133,60 @@ ObDDLResolver::~ObDDLResolver() { } +int ObDDLResolver::append_vec_args( + const ObPartitionResolveResult &resolve_result, + const obrpc::ObCreateIndexArg &index_arg, + bool &vec_common_aux_table_exist, + ObIArray &resolve_results, + ObIArray &index_arg_list, + ObIAllocator *allocator, + const ObSQLSessionInfo *session_info) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(allocator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("allocator is null", K(ret)); + } else if (!vec_common_aux_table_exist) { + const int64_t num_vec_args = 5; + // append 3号表 first + if (OB_FAIL(ObVecIndexBuilderUtil::append_vec_delta_buffer_arg(index_arg, allocator, session_info, index_arg_list))) { + LOG_WARN("failed to append vec delta_buffer_table arg", K(ret)); + } else if (OB_FAIL(ObVecIndexBuilderUtil::append_vec_rowkey_vid_arg(index_arg, allocator, index_arg_list))) { + LOG_WARN("failed to append vec rowkey_vid_table arg", K(ret)); + } else if (OB_FAIL(ObVecIndexBuilderUtil::append_vec_vid_rowkey_arg(index_arg, allocator, index_arg_list))) { + LOG_WARN("failed to append vec vid_rowkey_table arg", K(ret)); + } else if (OB_FAIL(ObVecIndexBuilderUtil::append_vec_index_id_arg(index_arg, allocator, index_arg_list))) { + LOG_WARN("failed to append vec index_id_table arg", K(ret)); + } else if (OB_FAIL(ObVecIndexBuilderUtil::append_vec_index_snapshot_data_arg(index_arg, allocator, index_arg_list))) { + LOG_WARN("failed to append vec index_snapshot_data_table arg", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < num_vec_args; ++i) { + if (OB_FAIL(resolve_results.push_back(resolve_result))) { + LOG_WARN("fail to push back index_stmt_list", K(ret), K(resolve_result)); + } + } + if (OB_SUCC(ret)) { + vec_common_aux_table_exist = true; + } + } else { + const int64_t num_vec_args = 3; // 如果一个主表中已经创建过向量索引,那么只需要新增 3 张非共享索引辅助表 + if (OB_FAIL(ObVecIndexBuilderUtil::append_vec_delta_buffer_arg(index_arg, allocator, session_info, index_arg_list))) { + LOG_WARN("failed to append vec delta_buffer_table arg", K(ret)); + } else if (OB_FAIL(ObVecIndexBuilderUtil::append_vec_index_id_arg(index_arg, allocator, index_arg_list))) { + LOG_WARN("failed to append vec index_id_table arg", K(ret)); + } else if (OB_FAIL(ObVecIndexBuilderUtil::append_vec_index_snapshot_data_arg(index_arg, allocator, index_arg_list))) { + LOG_WARN("failed to append vec index_snapshot_data_table arg", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < num_vec_args; ++i) { + if (OB_FAIL(resolve_results.push_back(resolve_result))) { + LOG_WARN("fail to push back index_stmt_list", K(ret), K(resolve_result)); + } + } + } + LOG_DEBUG("finish append vec index args", K(index_arg), K(index_arg_list)); + return ret; +} + int ObDDLResolver::append_fts_args( const ObPartitionResolveResult &resolve_result, const obrpc::ObCreateIndexArg *index_arg, @@ -1753,7 +1809,11 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool case T_STORING_COLUMN_LIST: { ParseNode *cur_node = NULL; ObString column_name; - if (OB_ISNULL(option_node->children_[0]) || + if (INDEX_KEYNAME::VEC_KEY == index_keyname_) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "vector index not support storing column", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "setting vector index storing column is"); + } else if (OB_ISNULL(option_node->children_[0]) || T_STORING_COLUMN_LIST != option_node->type_ || option_node->num_child_ <1) { ret = OB_ERR_UNEXPECTED; @@ -1793,6 +1853,34 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool } break; } + case T_VEC_INDEX_PARAMS: { + ObString tmp_str; + int32_t index_param_length = option_node->str_len_; + const char *index_param_str = option_node->str_value_; + if (index_keyname_ != VEC_KEY) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "index params was set in not vector index is not supported", K(ret), K(index_param_length)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "set index params in not vector index is"); + } else if (OB_UNLIKELY(index_param_length > OB_MAX_INDEX_PARAMS_LENGTH)) { + ret = common::OB_ERR_TOO_LONG_IDENT; + SQL_RESV_LOG(WARN, "index params length is beyond limit", K(ret), K(index_param_length)); + LOG_USER_ERROR(OB_ERR_TOO_LONG_IDENT, index_param_length, index_param_str); + } else if (0 == index_param_length) { + ret = OB_OP_NOT_ALLOW; + SQL_RESV_LOG(WARN, "set index param empty is not allowed now", K(ret)); + LOG_USER_ERROR(OB_OP_NOT_ALLOW, "set index params empty is"); + } else { + tmp_str.assign_ptr(index_param_str, index_param_length); + if (OB_ISNULL(option_node->children_[0])) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(ERROR,"children can't be null", K(ret)); + } else if (OB_FAIL(ObVectorIndexUtil::insert_index_param_str(tmp_str, *allocator_, index_params_))) { + SQL_RESV_LOG(WARN, "write string failed", K(ret), K(tmp_str), K(index_params_)); + } else if (OB_FAIL(check_index_param(option_node, index_params_))) { + LOG_WARN("fail to check vector index definition", K(ret)); + } + } + } case T_PARSER_NAME: { if (OB_ISNULL(option_node->children_[0])) { ret = OB_ERR_UNEXPECTED; @@ -3589,6 +3677,12 @@ int ObDDLResolver::resolve_column_definition(ObColumnSchemaV2 &column, if (OB_SUCC(ret) && column.is_geometry() && OB_FAIL(column.set_geo_type(type_node->int32_values_[1]))) { SQL_RESV_LOG(WARN, "fail to set geometry sub type", K(ret), K(column)); } + + if (OB_SUCC(ret) && column.is_collection()) { + if (OB_FAIL(resolve_collection_column(type_node, column))) { + LOG_WARN("fail to resolve set column", K(ret), K(column)); + } + } } } if (OB_SUCC(ret)) { @@ -3957,10 +4051,10 @@ int ObDDLResolver::resolve_normal_column_attribute_constr_default(ObColumnSchema LOG_USER_ERROR(OB_INVALID_DEFAULT, column.get_column_name_str().length(), column.get_column_name_str().ptr()); SQL_RESV_LOG(WARN, "BLOB, TEXT column can't have a default value", K(column), K(default_value), K(ret)); } else if (!default_value.is_null() - && (ob_is_json_tc(column.get_data_type()) || ob_is_geometry_tc(column.get_data_type()))) { + && (ob_is_json_tc(column.get_data_type()) || ob_is_geometry_tc(column.get_data_type()) || ob_is_collection_sql_type(column.get_data_type()))) { ret = OB_ERR_BLOB_CANT_HAVE_DEFAULT; LOG_USER_ERROR(OB_ERR_BLOB_CANT_HAVE_DEFAULT, column.get_column_name_str().length(), column.get_column_name_str().ptr()); - SQL_RESV_LOG(WARN, "JSON or GEOM column can't have a default value", K(column), + SQL_RESV_LOG(WARN, "JSON or GEOM or ARRAY column can't have a default value", K(column), K(default_value), K(ret)); } else { if (T_CONSTR_DEFAULT == attr_node->type_) { @@ -4039,7 +4133,11 @@ int ObDDLResolver::resolve_normal_column_attribute(ObColumnSchemaV2 &column, resolve_stat.is_primary_key_ = true; // primary key should not be nullable column.set_nullable(false); - if (ob_is_text_tc(column.get_data_type())) { + if (ob_is_collection_sql_type(column.get_data_type())) { + ret = OB_ERR_WRONG_KEY_COLUMN; + LOG_USER_ERROR(OB_ERR_WRONG_KEY_COLUMN, column.get_column_name_str().length(), column.get_column_name_str().ptr()); + SQL_RESV_LOG(WARN, "VECTOR, TEXT column can't be primary key", K(column), K(ret)); + } else if (ob_is_text_tc(column.get_data_type())) { ret = OB_ERR_WRONG_KEY_COLUMN; LOG_USER_ERROR(OB_ERR_WRONG_KEY_COLUMN, column.get_column_name_str().length(), column.get_column_name_str().ptr()); SQL_RESV_LOG(WARN, "BLOB, TEXT column can't be primary key", K(column), K(ret)); @@ -5172,7 +5270,8 @@ int ObDDLResolver::resolve_tablespace_node(const ParseNode *node, int64_t &table return ret; } -int ObDDLResolver::cast_default_value(ObObj &default_value, +int ObDDLResolver::cast_default_value(ObSQLSessionInfo *session_info, + ObObj &default_value, const ObTimeZoneInfo *tz_info, const common::ObString *nls_formats, ObIAllocator &allocator, @@ -5209,6 +5308,7 @@ int ObDDLResolver::cast_default_value(ObObj &default_value, ObCastCtx cast_ctx(&allocator, &dtc_params, CUR_TIME, cast_mode, column_schema.get_collation_type(), NULL, &res_accuracy); + cast_ctx.exec_ctx_ = session_info->get_cur_exec_ctx(); if (ob_is_enumset_tc(column_schema.get_data_type())) { if (OB_FAIL(cast_enum_or_set_default_value(column_schema, cast_ctx, default_value))) { LOG_WARN("fail to cast enum or set default value", K(default_value), K(column_schema), K(ret)); @@ -5471,6 +5571,7 @@ void ObDDLResolver::reset() { is_set_lob_inrow_threshold_ = false; lob_inrow_threshold_ = OB_DEFAULT_LOB_INROW_THRESHOLD; auto_increment_cache_size_ = 0; + index_params_.reset(); } bool ObDDLResolver::is_valid_prefix_key_type(const ObObjTypeClass column_type_class) @@ -6199,6 +6300,34 @@ int ObDDLResolver::check_partition_name_duplicate(ParseNode *node, bool is_oracl return ret; } + +int ObDDLResolver::resolve_collection_column(const ParseNode *type_node, ObColumnSchemaV2 &column) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(type_node) + || OB_UNLIKELY(type_node->num_child_ != 1) + || OB_ISNULL(allocator_) + || OB_ISNULL(session_info_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("type node is NULL", K(ret), K(type_node), K(session_info_)); + } else if (OB_ISNULL(type_node->children_[0])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("child is NULL", K(ret)); + } else { + ObArray type_info_array; + ObStringBuffer buf(allocator_); + uint8_t depth = 0; + if (OB_FAIL(ObResolverUtils::resolve_collection_type_info(*type_node, buf, depth))) { + LOG_WARN("failed to resolve collection type info", K(ret)); + } else if (OB_FAIL(type_info_array.push_back(buf.string()))) { + LOG_WARN("fail to push back type info", K(ret)); + } else if (OB_FAIL(column.set_extended_type_info(type_info_array))) { + LOG_WARN("set enum or set info failed", K(ret)); + } + } + return ret; +} + int ObDDLResolver::fill_extended_type_info(const ParseNode &str_list_node, ObColumnSchemaV2 &column) { int ret = OB_SUCCESS; @@ -6562,7 +6691,9 @@ int ObDDLResolver::check_default_value(ObObj &default_value, { int ret = OB_SUCCESS; ObArray dummy_array; - SMART_VAR(ObSQLSessionInfo, empty_session) { + SMART_VARS_3((sql::ObSQLSessionInfo, empty_session), (ObExecContext, exec_ctx, allocator), + (ObPhysicalPlanCtx, phy_plan_ctx, allocator)) { + LinkExecCtxGuard link_guard(empty_session, exec_ctx); if (OB_FAIL(init_empty_session(tz_info_wrap, nls_formats, local_session_var, @@ -6573,11 +6704,14 @@ int ObDDLResolver::check_default_value(ObObj &default_value, empty_session))) { LOG_WARN("failed to init empty session", K(ret)); } else if (FALSE_IT(empty_session.set_stmt_type(stmt::T_CREATE_TABLE))) { // set a fake ddl stmt type to specifiy ddl stmt type + } else if (FALSE_IT(exec_ctx.set_physical_plan_ctx(&phy_plan_ctx))) { + } else if (FALSE_IT(exec_ctx.set_my_session(&empty_session))) { } else if (OB_FAIL(check_default_value(default_value, tz_info_wrap, nls_formats, allocator, table_schema, dummy_array,column, gen_col_expr_arr, sql_mode, &empty_session, allow_sequence, schema_checker))) { LOG_WARN("check default value failed", K(ret)); } + exec_ctx.set_physical_plan_ctx(NULL); } return ret; } @@ -6735,7 +6869,7 @@ int ObDDLResolver::check_default_value(ObObj &default_value, LOG_DEBUG("finish check default value", K(input_default_value), K(expr_str), K(tmp_default_value), K(tmp_dest_obj), K(tmp_dest_obj_null), KPC(expr), K(ret)); } else { bool is_oracle_mode = false; - if (OB_FAIL(cast_default_value(default_value, tz_info_wrap.get_time_zone_info(), + if (OB_FAIL(cast_default_value(session_info, default_value, tz_info_wrap.get_time_zone_info(), nls_formats, allocator, column, sql_mode))) { LOG_WARN("fail to cast default value!", K(ret), K(default_value), KPC(tz_info_wrap.get_time_zone_info()), K(column), K(sql_mode)); } else if (OB_FAIL(ObCompatModeGetter::check_is_oracle_mode_with_tenant_id(table_schema.get_tenant_id(), is_oracle_mode))) { @@ -7555,6 +7689,83 @@ int ObDDLResolver::resolve_spatial_index_constraint( return ret; } +int ObDDLResolver::resolve_vec_index_constraint( + const share::schema::ObColumnSchemaV2 &column_schema, + const int64_t index_keyname_value, + ParseNode *node) +{ + int ret = OB_SUCCESS; + if (!column_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argumnet", K(ret), K(column_schema)); + } else { + bool is_vec_index = (index_keyname_value == static_cast(INDEX_KEYNAME::VEC_KEY)); + uint64_t tenant_id = column_schema.get_tenant_id(); + bool is_collection_column = ob_is_collection_sql_type(column_schema.get_data_type()); + uint64_t tenant_data_version = 0; + const int64_t MAX_DIM_LIMITED = 2000; + bool is_vector_memory_valid = false; + int64_t dim = 0; + if (!is_vec_index) { + // do nothing + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get tenant data version failed", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_3_0) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("tenant data version is less than 4.3.3, vector index not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.3.3, vector index"); + } else if (!is_collection_column) { + ret = OB_ERR_BAD_VEC_INDEX_COLUMN; + LOG_USER_ERROR(OB_ERR_BAD_VEC_INDEX_COLUMN, + column_schema.get_column_name_str().length(), + column_schema.get_column_name_str().ptr()); + LOG_WARN("vector index can only be built on vector column", K(ret), K(column_schema)); + } else if (OB_FAIL(ObVectorIndexUtil::get_vector_dim_from_extend_type_info(column_schema.get_extended_type_info(), dim))) { + LOG_WARN("fail to get vector dim", K(ret), K(column_schema)); + } else if (dim > MAX_DIM_LIMITED) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("vector index dim larger than 2000 is not supported", K(ret), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "vec index dim larger than 2000 is"); + } else if (OB_FAIL(ObPluginVectorIndexHelper::is_ob_vector_memory_valid(session_info_->get_effective_tenant_id(), is_vector_memory_valid))) { + LOG_WARN("fail to check is_ob_vector_memory_valid", K(ret)); + } else if (!is_vector_memory_valid) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support vector index when ob_vector_memory_limit_percentage is 0", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "when ob_vector_memory_limit_percentage = 0 or memsotre_limit >= 85, vector index is"); + } else { + index_keyname_ = VEC_KEY; + ParseNode *option_node = NULL; + bool has_set_params = false; + if (OB_ISNULL(node)) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "vector index params not set is"); + } else if(T_TABLE_OPTION_LIST != node->type_ || node->num_child_ < 1) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "invalid parse node", KR(ret), K(node->type_), K(node->num_child_)); + } else if (OB_ISNULL(node->children_)) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "vector index params not set is"); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < node->num_child_; ++i) { + if (OB_ISNULL(option_node = node->children_[i])) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "vector index params not set is"); + } else if (option_node->type_ != T_VEC_INDEX_PARAMS) { + } else { + has_set_params = true; + } + } + } + if (OB_SUCC(ret) && has_set_params) { + } else { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "vector index params not set is"); + } + } + } + return ret; +} + int ObDDLResolver::resolve_fts_index_constraint( const share::schema::ObTableSchema &table_schema, const common::ObString &column_name, @@ -7580,6 +7791,44 @@ int ObDDLResolver::resolve_fts_index_constraint( return ret; } +int ObDDLResolver::resolve_vec_index_constraint( + const share::schema::ObTableSchema &table_schema, + ObSchemaChecker &schema_checker, + const common::ObString &column_name, + const int64_t index_keyname_value, + ParseNode *node) +{ + int ret = OB_SUCCESS; + const ObColumnSchemaV2 *column_schema = NULL; + bool is_column_has_vector_index = false; + if (!table_schema.is_valid() || column_name.empty()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argumnet", K(ret), K(table_schema), K(column_name)); + } else if (OB_ISNULL(session_info_) || OB_ISNULL(allocator_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret), K(session_info_), K(allocator_)); + } else if (OB_ISNULL(column_schema = table_schema.get_column_schema(column_name))) { + ret = OB_ERR_KEY_COLUMN_DOES_NOT_EXITS; + LOG_USER_ERROR(OB_ERR_KEY_COLUMN_DOES_NOT_EXITS, + column_name.length(), + column_name.ptr()); + } else if (OB_FAIL(ObVectorIndexUtil::check_column_has_vector_index(table_schema, + *schema_checker.get_schema_guard(), + column_schema->get_column_id(), + is_column_has_vector_index))) { + LOG_WARN("resolve vec index constraint fail", K(ret), K(index_keyname_value)); + } else if (is_column_has_vector_index) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("create vector index on column has vector index is not supported", K(ret), K(column_name)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create vector index on column has vector index is"); + } else if (OB_FAIL(resolve_vec_index_constraint(*column_schema, + index_keyname_value, + node))) { + LOG_WARN("resolve vec index constraint fail", K(ret), K(index_keyname_value)); + } + return ret; +} + // Fts index can only be built on text columns. // CREATE TABLE fts_index_constraint (id int, // title varchar(100), @@ -8437,6 +8686,11 @@ int ObDDLResolver::generate_global_index_schema( } else if (OB_FAIL(share::ObIndexBuilderUtil::adjust_expr_index_args( my_create_index_arg, new_table_schema, *allocator_, gen_columns))) { LOG_WARN("fail to adjust expr index args", K(ret)); + } else if (share::schema::is_vec_index(my_create_index_arg.index_type_) && + OB_FAIL((ObVecIndexBuilderUtil::generate_vec_index_name(allocator_, my_create_index_arg.index_type_, + my_create_index_arg.index_name_, + my_create_index_arg.index_name_)))) { + LOG_WARN("failed to genearte vec parser name", K(ret)); } else if (OB_FAIL(do_generate_global_index_schema( my_create_index_arg, new_table_schema))) { LOG_WARN("fail to do generate global index schema", K(ret)); @@ -12799,6 +13053,195 @@ int ObDDLResolver::resolve_column_skip_index( return ret; } +int ObDDLResolver::check_index_param(const ParseNode *option_node, ObString &index_params) +{ + + int ret = OB_SUCCESS; + if (OB_ISNULL(option_node) ) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(ERROR, "unexpected parse node", K(ret), KP(option_node)); + } else if (option_node->type_ != T_VEC_INDEX_PARAMS) { + ret = OB_ERR_UNEXPECTED; + SQL_RESV_LOG(ERROR, "unexpected parse node type", K(ret), K(option_node->type_)); + } else if (OB_ISNULL(option_node->children_[0])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("option_node child is null", K(ret), KP(option_node->children_[0])); + } else { + if (option_node->num_child_ < 4 || option_node->num_child_ % 2 != 0) { // at least distance and type should be set + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "invalid vector param num", K(ret), K(option_node->num_child_)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "vector index params not set distance and type is"); + } + ObString last_variable; + ObString parser_name; + ObString new_variable_name; + ObString new_parser_name; + int64_t parser_value = 0; + int32_t str_len = 0; + int64_t m_value = 0; + int64_t ef_construction_value = 0; + + bool distance_is_set = false; + bool lib_is_set = false; + bool type_is_set = false; + bool m_is_set = false; + bool ef_construction_is_set = false; + bool ef_search_is_set = false; + + const ObString default_lib = "VSAG"; + const int64_t default_m_value = 16; + const int64_t default_ef_construction_value = 200; + const int64_t default_ef_search_value = 64; + + for (int64_t i = 0; OB_SUCC(ret) && i < option_node->num_child_; ++i) { + int32_t child_node_index = i % 2; + if (child_node_index == 0) { + str_len = static_cast(option_node->children_[i]->str_len_); + parser_name_.assign_ptr(option_node->children_[i]->str_value_, str_len); + new_variable_name = parser_name_; + if (OB_FAIL(ob_simple_low_to_up(*allocator_, parser_name_, new_variable_name))) { + LOG_WARN("string low to up failed", K(ret), K(parser_name_)); + } else if (new_variable_name != "DISTANCE" && + new_variable_name != "LIB" && + new_variable_name != "TYPE" && + new_variable_name != "M" && + new_variable_name != "EF_CONSTRUCTION" && + new_variable_name != "EF_SEARCH") { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "unexpected vector variable name", K(ret), K(new_variable_name)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "unexpected vector index params items is"); + } else { + last_variable = new_variable_name; + } + } else { + if (option_node->children_[i]->type_ == T_NUMBER) { + parser_value = option_node->children_[i]->value_; + } else { + str_len = static_cast(option_node->children_[i]->str_len_); + parser_name.assign_ptr(option_node->children_[i]->str_value_, str_len); + new_parser_name = parser_name; + if (OB_FAIL(ob_simple_low_to_up(*allocator_, parser_name, new_parser_name))) { + SQL_RESV_LOG(WARN, "string low to up failed", K(ret), K(parser_name)); + } + } + if (OB_FAIL(ret)) { + } else if (last_variable == "DISTANCE") { + if (new_parser_name == "INNER_PRODUCT" || + new_parser_name == "L2") { + distance_is_set = true; + } else { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "not support vector index distance algorithm", K(ret), K(new_parser_name)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "this type of vector index distance algorithm is"); + } + } else if (last_variable == "LIB") { + if (new_parser_name == "VSAG") { + lib_is_set = true; + } else { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "not support vector index lib", K(ret), K(new_parser_name)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "this type of vector index lib is"); + } + } else if (last_variable == "TYPE") { + if (new_parser_name == "HNSW") { + type_is_set = true; + } else { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "not support vector index type", K(ret), K(new_parser_name)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "this type of vector index type is"); + } + } else if (last_variable == "M") { + if (parser_value >= 5 && parser_value <= 64 ) { + m_is_set = true; + m_value = parser_value; + } else { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "invalid vector index m value", K(ret), K(parser_value)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "this value of vector index m is"); + } + } else if (last_variable == "EF_CONSTRUCTION") { + if (parser_value >= 5 && parser_value <= 1000 ) { + ef_construction_is_set = true; + ef_construction_value = parser_value; + } else { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "invalid vector index ef_construction value", K(ret), K(parser_value)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "this value of vector index ef_construction is"); + } + } else if (last_variable == "EF_SEARCH") { + if (parser_value >= 1 && parser_value <= 1000 ) { + ef_search_is_set = true; + } else { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "invalid vector index ef_search value", K(ret), K(parser_value)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "this value of vector index ef_search is"); + } + } else { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "not support vector index param", K(ret), K(last_variable)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "this value of vector index ef_search is"); + } + } + } + if (OB_SUCC(ret)) { + ef_construction_value = ef_construction_is_set ? ef_construction_value : default_ef_construction_value; + m_value = m_is_set ? m_value : default_m_value; + if (!distance_is_set || !type_is_set) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "unexpected setting of vector index param, distance or type has not been set", + K(ret), K(distance_is_set), K(type_is_set)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "the vector index params of distance or type not set is"); + } else if (ef_construction_value <= m_value) { + ret = OB_NOT_SUPPORTED; + SQL_RESV_LOG(WARN, "unexpected setting of vector index param, ef_construction value must be larger than m value", + K(ret), K(ef_construction_value), K(m_value)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "the vector index params ef_construction less than or equal to m value is"); + } else { + char not_set_params_str[OB_MAX_TABLE_NAME_LENGTH]; + int64_t pos = 0; + if (!lib_is_set && OB_FAIL(databuff_printf(not_set_params_str, + OB_MAX_TABLE_NAME_LENGTH, + pos, + ", LIB=%.*s", + default_lib.length(), + default_lib.ptr()))) { + LOG_WARN("fail to printf databuff", K(ret)); + } else if (!m_is_set && OB_FAIL(databuff_printf(not_set_params_str, + OB_MAX_TABLE_NAME_LENGTH, + pos, + ", M=%ld", + default_m_value))) { + LOG_WARN("fail to printf databuff", K(ret)); + } else if (!ef_construction_is_set && OB_FAIL(databuff_printf(not_set_params_str, + OB_MAX_TABLE_NAME_LENGTH, + pos, + ", EF_CONSTRUCTION=%ld", + default_ef_construction_value))) { + LOG_WARN("fail to printf databuff", K(ret)); + } else if (!ef_search_is_set && OB_FAIL(databuff_printf(not_set_params_str, + OB_MAX_TABLE_NAME_LENGTH, + pos, + ", EF_SEARCH=%ld", + default_ef_search_value))) { + LOG_WARN("fail to printf databuff", K(ret)); + } else { + char *buf = nullptr; + const int64_t alloc_len = index_params.length() + pos; + if (OB_ISNULL(buf = (static_cast(allocator_->alloc(alloc_len))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory for vector index param", K(ret), K(alloc_len)); + } else { + MEMCPY(buf, index_params.ptr(), index_params.length()); + MEMCPY(buf + index_params.length(), not_set_params_str, pos); + index_params.assign_ptr(buf, alloc_len); + } + } + } + } + } + return ret; +} + int ObDDLResolver::check_skip_index(share::schema::ObTableSchema &table_schema) { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.h b/src/sql/resolver/ddl/ob_ddl_resolver.h index 9ee2861e6f..db429ba77f 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.h +++ b/src/sql/resolver/ddl/ob_ddl_resolver.h @@ -172,6 +172,15 @@ public: static const int64_t DEFAULT_TABLE_DOP = 1; explicit ObDDLResolver(ObResolverParams ¶ms); virtual ~ObDDLResolver(); + + static int append_vec_args( + const ObPartitionResolveResult &resolve_result, + const obrpc::ObCreateIndexArg &index_arg, + bool &fts_common_aux_table_exist, + ObIArray &resolve_results, + ObIArray &index_arg_list, + ObIAllocator *allocator, + const ObSQLSessionInfo *session_info); static int append_fts_args( const ObPartitionResolveResult &resolve_result, const obrpc::ObCreateIndexArg *index_arg, @@ -246,6 +255,7 @@ public: const share::schema::ObColumnSchemaV2 &column, common::ObObj &default_value); static int cast_default_value( + ObSQLSessionInfo *session_info, common::ObObj &default_value, const common::ObTimeZoneInfo *tz_info, const common::ObString *nls_formats, @@ -526,6 +536,16 @@ public: int resolve_multivalue_index_constraint( const share::schema::ObColumnSchemaV2 &column_schema, const int64_t index_keyname_value); + int resolve_vec_index_constraint( + const share::schema::ObTableSchema &table_schema, + ObSchemaChecker &schema_checker, + const common::ObString &column_name, + const int64_t index_keyname_value, + ParseNode *node); + int resolve_vec_index_constraint( + const share::schema::ObColumnSchemaV2 &column_schema, + const int64_t index_keyname_value, + ParseNode *node); protected: static int get_part_str_with_type( const bool is_oracle_mode, @@ -720,6 +740,10 @@ protected: int resolve_enum_or_set_column( const ParseNode *type_node, share::schema::ObColumnSchemaV2 &column); + int resolve_collection_column( + const ParseNode *type_node, + share::schema::ObColumnSchemaV2 &column); + static int is_gen_col_with_udf(const ObTableSchema &table_schema, const ObRawExpr *col_expr, @@ -966,8 +990,8 @@ protected: int deep_copy_string_in_part_expr(ObPartitionedStmt* stmt); int deep_copy_column_expr_name(common::ObIAllocator &allocator, ObIArray &exprs); int check_ttl_definition(const ParseNode *node); - int add_new_indexkey_for_oracle_temp_table(); + int check_index_param(const ParseNode *option_node, ObString &index_params); void reset(); int get_mv_container_table(uint64_t tenant_id, @@ -1041,6 +1065,7 @@ protected: bool have_generate_fts_arg_; bool is_set_lob_inrow_threshold_; int64_t lob_inrow_threshold_; + bool have_generate_vec_arg_; int64_t auto_increment_cache_size_; ObExternalFileFormat::FormatType external_table_format_type_; common::ObBitSet<> mocked_external_table_column_ids_; diff --git a/src/sql/resolver/dml/ob_default_value_utils.cpp b/src/sql/resolver/dml/ob_default_value_utils.cpp index d1dc046afd..4b2ce325a9 100644 --- a/src/sql/resolver/dml/ob_default_value_utils.cpp +++ b/src/sql/resolver/dml/ob_default_value_utils.cpp @@ -121,7 +121,8 @@ int ObDefaultValueUtils::resolve_default_function_static( expr_factory, col_schema, fun_expr->get_param_expr(4), session_info))) { LOG_WARN("fail to build default value", K(ret)); - } else if (ob_is_enumset_tc(col_schema->get_data_type())) { + } else if (ob_is_enumset_tc(col_schema->get_data_type()) + || ob_is_collection_sql_type(col_schema->get_data_type())) { const ObIArray &enum_set_values = col_schema->get_extended_type_info(); if (OB_FAIL(fun_expr->set_enum_set_values(enum_set_values))) { LOG_WARN("failed to set_enum_set_values", K(ret)); @@ -211,7 +212,8 @@ int ObDefaultValueUtils::resolve_default_function(ObRawExpr *&expr, ObStmtScope } else if (OB_FAIL(build_default_function_expr( column_item, fun_expr->get_param_expr(4), scope, false))) { LOG_WARN("fail to build default value", K(ret)); - } else if (ob_is_enumset_tc(column_expr->get_data_type())) { + } else if (ob_is_enumset_tc(column_expr->get_data_type()) + || ob_is_collection_sql_type(column_expr->get_data_type())) { const ObIArray &enum_set_values = column_expr->get_enum_set_values(); if (OB_FAIL(fun_expr->set_enum_set_values(enum_set_values))) { LOG_WARN("failed to set_enum_set_values", K(ret)); @@ -281,7 +283,8 @@ int ObDefaultValueUtils::resolve_default_expr(const ColumnItem &column_item, ObR LOG_WARN("fail to add defualt value expr", K(ret)); } else { const ObColumnSchemaV2 *column_schema = NULL; - if (ob_is_enumset_tc(column_item.get_column_type()->get_type())) { + if (ob_is_enumset_tc(column_item.get_column_type()->get_type()) + || ob_is_collection_sql_type(column_item.get_column_type()->get_type())) { bool is_link = ObSqlSchemaGuard::is_link_table(stmt_, column_item.table_id_); if (OB_ISNULL(params_->schema_checker_)) { ret = OB_ERR_UNEXPECTED; @@ -338,7 +341,8 @@ int ObDefaultValueUtils::build_default_expr_strict_static( } else { c_expr->set_value(column_schema->get_cur_default_value()); } - if (OB_SUCC(ret) && ob_is_enumset_tc(c_expr->get_data_type())) { + if (OB_SUCC(ret) + && (ob_is_enumset_tc(c_expr->get_data_type()) || ob_is_collection_sql_type(c_expr->get_data_type()))) { if (OB_UNLIKELY(column_schema->get_extended_type_info().count() < 1)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid column schema", KPC(column_schema), K(ret)); @@ -414,7 +418,8 @@ int ObDefaultValueUtils::build_default_expr_strict(const ColumnItem *column, ObR } else { c_expr->set_value(column->default_value_); } - if (OB_SUCC(ret) && ob_is_enumset_tc(c_expr->get_data_type())) { + if (OB_SUCC(ret) + && (ob_is_enumset_tc(c_expr->get_data_type()) || ob_is_collection_sql_type(c_expr->get_data_type()))) { const ObColumnRefRawExpr *column_expr = column->get_expr(); if (OB_ISNULL(column_expr)) { ret = OB_ERR_UNEXPECTED; @@ -799,7 +804,8 @@ int ObDefaultValueUtils::build_default_expr_not_strict_static( } } - if (OB_SUCC(ret) && ob_is_enumset_tc(c_expr->get_data_type())) { + if (OB_SUCC(ret) + && (ob_is_enumset_tc(c_expr->get_data_type()) || ob_is_collection_sql_type(c_expr->get_data_type()))) { if (OB_UNLIKELY(column_schema->get_extended_type_info().count() < 1)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid column_expr", KPC(column_schema), K(ret)); @@ -853,7 +859,8 @@ int ObDefaultValueUtils::build_default_expr_not_strict(const ColumnItem *column, } } - if (OB_SUCC(ret) && ob_is_enumset_tc(c_expr->get_data_type())) { + if (OB_SUCC(ret) + && (ob_is_enumset_tc(c_expr->get_data_type()) || ob_is_collection_sql_type(c_expr->get_data_type()))) { const ObColumnRefRawExpr *column_expr = column->get_expr(); if (OB_ISNULL(column_expr)) { ret = OB_ERR_UNEXPECTED; diff --git a/src/sql/resolver/dml/ob_del_upd_resolver.cpp b/src/sql/resolver/dml/ob_del_upd_resolver.cpp index 746e69c517..6f22f27235 100644 --- a/src/sql/resolver/dml/ob_del_upd_resolver.cpp +++ b/src/sql/resolver/dml/ob_del_upd_resolver.cpp @@ -25,6 +25,7 @@ #include "pl/ob_pl_resolver.h" #include "sql/parser/parse_malloc.h" #include "sql/resolver/dml/ob_merge_resolver.h" +#include "share/vector_index/ob_vector_index_util.h" #include "share/external_table/ob_external_table_utils.h" namespace oceanbase @@ -356,6 +357,19 @@ int ObDelUpdResolver::resolve_column_and_values(const ParseNode &assign_list, } OZ (c_expr->add_flag(IS_TABLE_ASSIGN)); OX (c_expr->set_result_type(col_expr->get_result_type())); + if (col_expr->get_result_type().get_obj_meta().is_collection_sql_type() + && col_expr->get_enum_set_values().count() > 0) { + // array type + uint16_t subschema_id = 0; + if (OB_ISNULL(session_info_) || OB_ISNULL(session_info_->get_cur_exec_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("session or exec ctx is null", K(ret), K(session_info_)); + } else if (OB_FAIL(session_info_->get_cur_exec_ctx()->get_subschema_id_by_type_string(col_expr->get_enum_set_values().at(0), subschema_id))) { + LOG_WARN("failed to get array type subschema id", K(ret)); + } else { + c_expr->set_subschema_id(subschema_id); + } + } } } } else if (OB_UNLIKELY(!value_expr->is_query_ref_expr())) { @@ -558,6 +572,41 @@ int ObDelUpdResolver::generate_wrapper_expr_for_assignemnts(ObIArray &assigns, + bool &update_with_vector_index) +{ + int ret = OB_SUCCESS; + update_with_vector_index = false; + ObArray part_key_col_ids; + // get part keys + if (!table_schema->is_partitioned_table()) { + // do nothing + } else if (table_schema->get_partition_key_info().get_size() > 0 && + OB_FAIL(table_schema->get_partition_key_info().get_column_ids(part_key_col_ids))) { + LOG_WARN("failed to get column ids", K(ret)); + } else if (table_schema->get_subpartition_key_info().get_size() > 0 && + OB_FAIL(table_schema->get_subpartition_key_info().get_column_ids(part_key_col_ids))) { + LOG_WARN("failed to get column ids", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < assigns.count() && !update_with_vector_index; ++i) { + const ObAssignment &as = assigns.at(i); + if (OB_FAIL(ObVectorIndexUtil::check_column_has_vector_index(*table_schema, + *schema_guard, + as.column_expr_->get_column_id(), + update_with_vector_index))) { + LOG_WARN("fail to check update with vector index", K(ret)); + } else if (!update_with_vector_index && !part_key_col_ids.empty()) { + // check if update with part key, update with part key should also update vid + if (has_exist_in_array(part_key_col_ids, as.column_expr_->get_column_id())) { + update_with_vector_index = true; + } + } + } + return ret; +} + int ObDelUpdResolver::resolve_additional_assignments(ObIArray &assigns, const ObStmtScope scope) { @@ -567,6 +616,7 @@ int ObDelUpdResolver::resolve_additional_assignments(ObIArray const TableItem *table_item = NULL; ObDMLStmt *stmt = get_stmt(); bool trigger_exist = false; + bool update_with_vector_index = false; if (OB_ISNULL(params_.expr_factory_) || OB_ISNULL(stmt)) { ret = OB_NOT_INIT; LOG_WARN("params is invalid", K_(params_.expr_factory), K(stmt)); @@ -593,6 +643,11 @@ int ObDelUpdResolver::resolve_additional_assignments(ObIArray LOG_WARN("fail to call has_before_update_row_trigger", K(*table_schema)); } else if (OB_FAIL(generate_wrapper_expr_for_assignemnts(assigns.at(i).assignments_, trigger_exist))) { LOG_WARN("failed to resolve addtional assignments for const", K(ret), K(i)); + } else if (OB_FAIL(check_update_vector_col_with_vector_index(table_schema, + schema_guard, + assigns.at(i).assignments_, + update_with_vector_index))) { + LOG_WARN("failed to check if update with vector index", K(ret), K(i)); } else { for (ObTableSchema::const_column_iterator iter = table_schema->column_begin(); (OB_SUCCESS == ret && iter != table_schema->column_end()); ++iter) { @@ -611,6 +666,9 @@ int ObDelUpdResolver::resolve_additional_assignments(ObIArray *column_schema, need_assigned))) { LOG_WARN("fail to check assignment exist", KPC(table_item), K(column_id)); + } else if (FALSE_IT(need_assigned = need_assigned || + (column_schema->is_vec_vid_column() && + (update_with_vector_index || T_INSERT_SCOPE == scope)))) { } else if (need_assigned) { // for insert scope, on duplicate key update column list already // exists in insert list, therefore, only need to add assignment. @@ -635,6 +693,18 @@ int ObDelUpdResolver::resolve_additional_assignments(ObIArray if (OB_FAIL(utils.build_now_expr(col_item, assignment.expr_))) { LOG_WARN("fail to build default expr", K(ret)); } + } else if (column_schema->is_vec_vid_column() && (stmt->is_update_stmt() || T_INSERT_SCOPE == scope)) { + // for vid col, should build generated expr here for update assign new val + ObString col_def; + if (OB_FAIL(column_schema->get_cur_default_value().get_string(col_def))) { + LOG_WARN("get generated column definition failed", K(ret), K(*column_schema)); + } else if (OB_FAIL(ObSQLUtils::convert_sql_text_from_schema_for_resolve(*allocator_, + session_info_->get_dtc_params(), col_def))) { + LOG_WARN("fail to convert for resolve", K(ret)); + } else if (OB_FAIL(resolve_generated_column_expr(col_def, table_item->get_base_table_item(), column_schema, + *assignment.column_expr_, assignment.expr_, true, stmt))) { + LOG_WARN("resolve generated column expr failed", K(ret)); + } } else if (column_schema->is_generated_column()) { if (OB_FAIL(copy_schema_expr(*params_.expr_factory_, col_item->expr_->get_dependant_expr(), @@ -2360,6 +2430,9 @@ int ObDelUpdResolver::view_pullup_special_column_exprs() } else if (basic_column_item->expr_->is_identity_column()) { view_column_item->expr_->set_column_flags( basic_column_item->expr_->get_column_flags()); + } else if (basic_column_item->expr_->is_vec_vid_column()) { + view_column_item->expr_->set_column_flags( + basic_column_item->expr_->get_column_flags()); } } } @@ -2913,6 +2986,7 @@ int ObDelUpdResolver::build_column_conv_function_with_default_expr(ObInsertTable } else { ObSchemaGetterGuard *schema_guard = NULL; const ObTableSchema* table_schema = NULL; + const ObColumnSchemaV2 *col_schema = nullptr; bool trigger_exist = false; ColumnItem *column_item = del_upd_stmt->get_column_item_by_id(table_id, tbl_col->get_column_id()); ObRawExpr *function_expr = NULL; @@ -2930,46 +3004,55 @@ int ObDelUpdResolver::build_column_conv_function_with_default_expr(ObInsertTable } else if (OB_ISNULL(table_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("fail to get table schema", K(table_info), K(table_schema)); - } else if (OB_FAIL(table_schema->has_before_insert_row_trigger(*schema_guard, trigger_exist))) { - LOG_WARN("fail to call has_before_update_row_trigger", K(*table_schema)); } else if (OB_ISNULL(column_item)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null column item", K(ret), K(column_item)); - } else if (OB_FAIL(utils.generate_insert_value(column_item, expr, - del_upd_stmt->has_instead_of_trigger()))) { - LOG_WARN("failed to generate insert value", K(ret)); - } else if (OB_ISNULL(expr)) { + } else if (OB_ISNULL(col_schema = table_schema->get_column_schema(column_item->base_cid_))) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("expr should not be null", K(ret)); - } else if (ob_is_enum_or_set_type(expr->get_data_type())) { - function_expr = expr; + LOG_WARN("fail to get column schema", K(ret), KPC(tbl_col), KPC(column_item)); + } else if (col_schema->is_vec_vid_column()) { + if (OB_FAIL(build_vec_vid_function_expr(table_info, *col_schema, *tbl_col, function_expr))) { + LOG_WARN("fail to build doc id function expr", K(ret), K(table_info), KPC(tbl_col), KPC(col_schema)); + } } else { - // For char type, compare and hash ignore space - // For binary type, compare and hash not ignore '\0', so need to padding - // '\0' for optimizer calculating partition location. As storage do right - // trim of '\0', so don't worry extra space usage. - if (ObObjMeta::is_binary(tbl_col->get_data_type(), - tbl_col->get_collation_type())) { - if (OB_FAIL(build_padding_expr(session_info_, column_item, expr))) { - LOG_WARN("Build padding expr error", K(ret)); - } - } - // maybe 没有必要再加一层column - // conv函数,如果能够保证schema表中的默认值已经是合法值 - if (OB_FAIL(ret)) { - } else if (expr->get_expr_type() == T_TABLET_AUTOINC_NEXTVAL) { - // 如果是堆表的隐藏自增列,不需要构建conv表达式 + if (OB_FAIL(table_schema->has_before_insert_row_trigger(*schema_guard, trigger_exist))) { + LOG_WARN("fail to call has_before_update_row_trigger", K(*table_schema)); + } else if (OB_FAIL(utils.generate_insert_value(column_item, expr, + del_upd_stmt->has_instead_of_trigger()))) { + LOG_WARN("failed to generate insert value", K(ret)); + } else if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr should not be null", K(ret)); + } else if (ob_is_enum_or_set_type(expr->get_data_type())) { function_expr = expr; - } else if (OB_FAIL(ObRawExprUtils::build_column_conv_expr(*params_.expr_factory_, - *params_.allocator_, - *column_item->get_expr(), - expr, session_info_))) { - LOG_WARN("fail to build column conv expr", K(ret)); - } else if (trigger_exist && - OB_FAIL(ObRawExprUtils::build_wrapper_inner_expr(*params_.expr_factory_, *session_info_, expr, expr))) { - LOG_WARN("failed to build wrapper inner expr", K(ret)); } else { - function_expr = expr; + // For char type, compare and hash ignore space + // For binary type, compare and hash not ignore '\0', so need to padding + // '\0' for optimizer calculating partition location. As storage do right + // trim of '\0', so don't worry extra space usage. + if (ObObjMeta::is_binary(tbl_col->get_data_type(), + tbl_col->get_collation_type())) { + if (OB_FAIL(build_padding_expr(session_info_, column_item, expr))) { + LOG_WARN("Build padding expr error", K(ret)); + } + } + // maybe 没有必要再加一层column + // conv函数,如果能够保证schema表中的默认值已经是合法值 + if (OB_FAIL(ret)) { + } else if (expr->get_expr_type() == T_TABLET_AUTOINC_NEXTVAL) { + // 如果是堆表的隐藏自增列,不需要构建conv表达式 + function_expr = expr; + } else if (OB_FAIL(ObRawExprUtils::build_column_conv_expr(*params_.expr_factory_, + *params_.allocator_, + *column_item->get_expr(), + expr, session_info_))) { + LOG_WARN("fail to build column conv expr", K(ret)); + } else if (trigger_exist && + OB_FAIL(ObRawExprUtils::build_wrapper_inner_expr(*params_.expr_factory_, *session_info_, expr, expr))) { + LOG_WARN("failed to build wrapper inner expr", K(ret)); + } else { + function_expr = expr; + } } } if (OB_SUCC(ret)) { @@ -3984,7 +4067,7 @@ int ObDelUpdResolver::replace_gen_col_dependent_col(ObInsertTableInfo& table_inf if (OB_ISNULL(col_expr = table_info.column_exprs_.at(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); - } else if (!col_expr->is_generated_column()) { + } else if (!col_expr->is_generated_column() && !col_expr->is_vec_vid_column()) { // do nothing } else if (i >= table_info.column_conv_exprs_.count() || OB_ISNULL(table_info.column_conv_exprs_.at(i))) { @@ -4947,5 +5030,30 @@ int ObDelUpdResolver::mark_json_partial_update_flag(const ObColumnRefRawExpr *re return ret; } + +int ObDelUpdResolver::build_vec_vid_function_expr( + const ObInsertTableInfo& table_info, + const ObColumnSchemaV2 &col_schema, + const ObColumnRefRawExpr &column, + ObRawExpr *&func_expr) +{ + int ret = OB_SUCCESS; + ObString col_def; + TableItem *table_item = get_stmt()->get_table_item_by_id(table_info.table_id_); + if (OB_ISNULL(table_item)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, table item is nullptr", K(ret), K(table_info), K(table_info)); + } else if (OB_FAIL(col_schema.get_cur_default_value().get_string(col_def))) { + LOG_WARN("get generated column definition failed", K(ret), K(col_schema)); + } else if (OB_FAIL(ObSQLUtils::convert_sql_text_from_schema_for_resolve(*allocator_, session_info_->get_dtc_params(), + col_def))) { + LOG_WARN("fail to convert for resolve", K(ret)); + } else if (OB_FAIL(resolve_generated_column_expr(col_def, table_item->get_base_table_item(), &col_schema, column, + func_expr, true, get_del_upd_stmt()))) { + LOG_WARN("resolve generated column expr failed", K(ret)); + } + return ret; +} + } /* namespace sql */ } /* namespace oceanbase */ diff --git a/src/sql/resolver/dml/ob_del_upd_resolver.h b/src/sql/resolver/dml/ob_del_upd_resolver.h index 32c86305ac..d0dd1a4c1e 100644 --- a/src/sql/resolver/dml/ob_del_upd_resolver.h +++ b/src/sql/resolver/dml/ob_del_upd_resolver.h @@ -239,6 +239,10 @@ protected: int add_select_items(ObSelectStmt &select_stmt, const ObIArray& select_items); int add_select_list_for_set_stmt(ObSelectStmt &select_stmt); int add_all_lob_columns_to_stmt(const TableItem &table_item, ObIArray &column_exprs); + int check_update_vector_col_with_vector_index(const ObTableSchema *table_schema, + ObSchemaGetterGuard *schema_guard, + const common::ObIArray &assigns, + bool &update_with_vector_index); protected: int generate_insert_table_info(const TableItem &table_item, ObInsertTableInfo &table_info, @@ -284,6 +288,11 @@ protected: int mark_json_partial_update_flag(const ObColumnRefRawExpr *ref_expr, ObRawExpr *expr, int depth, bool &allow_json_partial_update); int add_select_item_func(ObSelectStmt &select_stmt, ColumnItem &col); int select_items_is_pk(const ObSelectStmt& select_stmt, bool &has_pk); + int build_vec_vid_function_expr( + const ObInsertTableInfo& table_info, + const ObColumnSchemaV2 &col_schema, + const ObColumnRefRawExpr &column, + ObRawExpr *&func_expr); int is_external_table_partition_column(const TableItem &table_item, uint64_t column_id, bool &is_part_column); diff --git a/src/sql/resolver/dml/ob_del_upd_stmt.cpp b/src/sql/resolver/dml/ob_del_upd_stmt.cpp index 5831cb6b6d..b3da802b5c 100644 --- a/src/sql/resolver/dml/ob_del_upd_stmt.cpp +++ b/src/sql/resolver/dml/ob_del_upd_stmt.cpp @@ -550,10 +550,11 @@ int ObDelUpdStmt::update_base_tid_cid() LOG_WARN("get unexpected null", K(col), K(ret)); } else { const bool is_rowkey_doc = col->get_table_name().suffix_match("rowkey_doc"); + const bool is_rowkey_vid = col->get_table_name().suffix_match("rowkey_vid_table"); col_item->base_tid_ = col->get_table_id(); col_item->base_cid_ = col->get_column_id(); if (OB_UNLIKELY(col_item->base_tid_ == OB_INVALID_ID) || - OB_UNLIKELY(j != 0 && col_item->base_tid_ != base_tid && !is_rowkey_doc)) { + OB_UNLIKELY(j != 0 && col_item->base_tid_ != base_tid && !is_rowkey_doc && !is_rowkey_vid)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("base table id is invalid", K(ret), K(col_item->base_tid_), K(base_tid)); } else if (j == 0) { diff --git a/src/sql/resolver/dml/ob_dml_resolver.cpp b/src/sql/resolver/dml/ob_dml_resolver.cpp index f418d21a4b..e234a3ad4e 100755 --- a/src/sql/resolver/dml/ob_dml_resolver.cpp +++ b/src/sql/resolver/dml/ob_dml_resolver.cpp @@ -1808,7 +1808,7 @@ int ObDMLResolver::resolve_sql_expr(const ParseNode &node, ObRawExpr *&expr, } if (OB_SUCC(ret) && match_exprs.count() > 0) { - if (OB_FAIL(resolve_match_against_exprs(expr, match_exprs, current_scope_))) { + if (OB_FAIL(resolve_match_against_exprs(expr, match_exprs, current_scope_))) { // resolve and add match expr LOG_WARN("failed to resolve match against expr", K(ret)); } } @@ -2692,6 +2692,27 @@ int ObDMLResolver::resolve_basic_column_ref(const ObQualifiedName &q_name, ObRaw return ret; } +int ObDMLResolver::generate_subschema_id(ObColumnRefRawExpr &col_expr) +{ + int ret = OB_SUCCESS; + uint16_t subschema_id = ObMaxSystemUDTSqlType; + if (!ob_is_collection_sql_type(col_expr.get_data_type())) { + // do nothing + } else if (OB_ISNULL(session_info_) || OB_ISNULL(session_info_->get_cur_exec_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("sesion or exec ctx is null", K(ret)); + } else if (col_expr.get_enum_set_values().count() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected type name", K(ret), K(col_expr.get_enum_set_values().count())); + } else if (OB_FAIL(session_info_->get_cur_exec_ctx()->get_subschema_id_by_type_string(col_expr.get_enum_set_values().at(0), subschema_id))) { + LOG_WARN("failed to get array type subschema id", K(ret)); + } else { + col_expr.set_subschema_id(subschema_id); + } + + return ret; +} + int ObDMLResolver::resolve_basic_column_item(const TableItem &table_item, const ObString &column_name, bool include_hidden, @@ -2761,6 +2782,9 @@ int ObDMLResolver::resolve_basic_column_item(const TableItem &table_item, LOG_WARN("column schema is null"); } else if (OB_FAIL(ObRawExprUtils::build_column_expr(*params_.expr_factory_, *col_schema, col_expr))) { LOG_WARN("build column expr failed", K(ret)); + } else if (ob_is_collection_sql_type(col_expr->get_data_type()) + && OB_FAIL(generate_subschema_id(*col_expr))) { + LOG_WARN("generate subschema id for collection column expr failed", K(ret)); } else if (OB_FAIL(table_schema->is_unique_key_column(*schema_guard, col_schema->get_column_id(), is_uni))) { @@ -2804,7 +2828,15 @@ int ObDMLResolver::resolve_basic_column_item(const TableItem &table_item, if (OB_SUCC(ret)) { ObString col_def; ObRawExpr *ref_expr = NULL; - if (col_schema->is_generated_column()) { + bool vec_vid_need_column_ref_expr = false; + if (col_schema->is_vec_vid_column() + && OB_FAIL(check_vec_vid_need_column_ref_expr(*stmt, vec_vid_need_column_ref_expr))) { + LOG_WARN("fail to check vec vid need column ref expr", K(ret), KPC(col_schema)); + } else if (vec_vid_need_column_ref_expr) { + // select __vid from rowkey doc instead of generated by auto inc seq in tablet. + col_expr->del_column_flag(VIRTUAL_GENERATED_COLUMN_FLAG); + LOG_DEBUG("vec index debug", KPC(stmt), KPC(col_expr), KPC(col_schema), K(common::lbt())); + } else if (col_schema->is_generated_column()) { column_item.set_default_value(ObObj()); // set null to generated default value if (OB_FAIL(col_schema->get_cur_default_value().get_string(col_def))) { LOG_WARN("get generated column definition failed", K(ret), K(*col_schema)); @@ -5532,7 +5564,10 @@ int ObDMLResolver::resolve_base_or_alias_table_item_normal(uint64_t tenant_id, cte_table_fisrt, is_hidden, tschema))) { - if (OB_TABLE_NOT_EXIST == ret && ((stmt->is_select_stmt() && select_index_enabled) || session_info_->get_ddl_info().is_ddl())) { + if (OB_TABLE_NOT_EXIST == ret && + ((stmt->is_select_stmt() && select_index_enabled) || + session_info_->get_ddl_info().is_ddl() || + session_info_->get_ddl_info().is_dummy_ddl_for_inner_visibility())) { if (OB_FAIL(schema_checker_->get_table_schema(tenant_id, database_id, tbl_name, @@ -5541,7 +5576,10 @@ int ObDMLResolver::resolve_base_or_alias_table_item_normal(uint64_t tenant_id, is_hidden, tschema, false/*is_built_in_index*/))) { - if (OB_TABLE_NOT_EXIST == ret && stmt->is_select_stmt() && select_index_enabled) { + if (OB_TABLE_NOT_EXIST == ret && + ((stmt->is_select_stmt() && select_index_enabled) || + session_info_->get_ddl_info().is_ddl() || + session_info_->get_ddl_info().is_dummy_ddl_for_inner_visibility())) { if (OB_FAIL(schema_checker_->get_table_schema(tenant_id, database_id, tbl_name, @@ -5933,7 +5971,7 @@ int ObDMLResolver::resolve_table_partition_expr(const TableItem &table_item, con if (OB_SUCC(ret)) { ObRawExpr *tmp_part_expr = part_expr; ObRawExpr *tmp_subpart_expr = subpart_expr; - if (session_info_->get_ddl_info().is_ddl() && ObItemType::T_INSERT == params_.resolver_scope_stmt_type_) { + if ((session_info_->get_ddl_info().is_ddl()) && ObItemType::T_INSERT == params_.resolver_scope_stmt_type_) { // Only insert into select stmt needs to replace the virtual column(aka, part key) with its' definition. const ObTableSchema *index_schema = NULL; const ObPartitionKeyInfo &partition_keys = table_schema.get_partition_key_info(); @@ -6390,7 +6428,7 @@ int ObDMLResolver::resolve_partition_expr( LOG_WARN("resolve columns for parent table partition expr failed", K(ret)); } } else if (OB_FAIL(resolve_columns_for_partition_expr(expr, columns, table_item, - table_schema.is_oracle_tmp_table() || table_schema.is_fts_index()))) { + table_schema.is_oracle_tmp_table() || table_schema.is_fts_index() || table_schema.is_vec_index()))) { LOG_WARN("resolve columns for partition expr failed", K(ret)); } } @@ -7153,6 +7191,55 @@ int ObDMLResolver::add_all_rowkey_columns_to_stmt(const TableItem &table_item, return ret; } +int ObDMLResolver::resolve_approx_clause(const ParseNode *approx_node) +{ + int ret = OB_SUCCESS; + ObDMLStmt *stmt = get_stmt(); + uint64_t data_version = 0; + if (OB_ISNULL(stmt) || OB_ISNULL(session_info_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect null pointer", KPC(stmt), KPC(session_info_), K(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(session_info_->get_effective_tenant_id(), data_version))) { + LOG_WARN("fail to get data_version", K(session_info_->get_effective_tenant_id()), K(data_version), K(ret)); + } else if (data_version < DATA_VERSION_4_3_3_0) { + // do nothing + } else if (OB_ISNULL(stmt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect null pointer", KPC(stmt), K(ret)); + } else if (OB_NOT_NULL(approx_node) && stmt->get_order_item_size() == 1) { + int order_size = stmt->get_order_item_size(); + bool found = false; + ObRawExpr *tmp_expr = stmt->get_order_item(0).expr_; + bool has_const = false; + if (OB_NOT_NULL(tmp_expr) && tmp_expr->is_vector_sort_expr()) { + // only order by distance with approx, set it true + int size = tmp_expr->get_param_count(); + for (int i = 0; i < size && OB_SUCC(ret) && !has_const; ++i) { + ObRawExpr *param_expr = tmp_expr->get_param_expr(i); + if (OB_ISNULL(param_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpect null pointer", KPC(tmp_expr), K(i), K(ret)); + } else if (param_expr->is_const_expr()) { + has_const = true; + stmt->set_has_vec_approx(true); + } + } + if (OB_SUCC(ret) && !has_const) { + // if has vector expr, but no const param, not support use vector index + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "can't use vector index without const param."); + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "can't use vector index without vector_sort_expr."); + } + } else if (OB_NOT_NULL(approx_node)) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "not support multi order by item when use vector index"); + } + return ret; +} + int ObDMLResolver::resolve_limit_clause(const ParseNode *node, bool disable_offset/*= false*/) { int ret = OB_SUCCESS; @@ -8222,6 +8309,9 @@ int ObDMLResolver::resolve_generated_column_expr(const ObString &expr_str, } else if (OB_NOT_NULL(column_schema) && column_schema->is_doc_id_column() && OB_FAIL(fill_doc_id_expr_param(table_item.table_id_, table_item.ref_id_, table_schema, ref_expr))) { LOG_WARN("fail to fill doc id expr param", K(ret), K(table_item), KP(table_schema), KP(ref_expr)); + } else if (OB_NOT_NULL(column_schema) && column_schema->is_vec_vid_column() + && OB_FAIL(fill_vec_id_expr_param(table_item.table_id_, table_item.ref_id_, table_schema, ref_expr))) { + LOG_WARN("fail to fill vec vid expr param", K(ret), K(table_item), KP(table_schema), KP(ref_expr)); } bool is_default_udt_constructor = false; @@ -9329,12 +9419,17 @@ int ObDMLResolver::check_table_exist_or_not(uint64_t tenant_id, const bool is_hidden = session_info_->is_table_name_hidden(); if (OB_FAIL(session_info_->is_select_index_enabled(select_index_enabled))) { LOG_WARN("fail to get select_index_enabled", K(ret)); - } else if ((select_index_enabled && is_select_resolver()) || session_info_->get_ddl_info().is_ddl()) { + } else if ((select_index_enabled && is_select_resolver()) || + session_info_->get_ddl_info().is_ddl() || + session_info_->get_ddl_info().is_dummy_ddl_for_inner_visibility()) { if (OB_FAIL(schema_checker_->check_table_or_index_exists( tenant_id, database_id, table_name, is_hidden, false/*is_built_in_index*/, is_exist))) { LOG_WARN("fail to check table or index exist", K(tenant_id), K(database_id), K(table_name), K(ret)); - } else if (select_index_enabled && is_select_resolver() && !is_exist) { + } else if (((select_index_enabled && is_select_resolver()) || + session_info_->get_ddl_info().is_ddl() || + session_info_->get_ddl_info().is_dummy_ddl_for_inner_visibility()) && + !is_exist) { if (OB_FAIL(schema_checker_->check_table_or_index_exists(tenant_id, database_id, table_name, is_hidden, true/*is_built_in_index*/, is_exist))) { LOG_WARN("fail to check table or hidden index exist", K(ret), K(tenant_id), K(database_id), K(table_name)); @@ -10738,7 +10833,8 @@ int ObDMLResolver::resolve_generated_table_column_item(const TableItem &table_it col_expr->set_synonym_name(table_item.synonym_name_); } //set enum_set_values - if (ob_is_enumset_tc(select_expr->get_data_type())) { + if (ob_is_enumset_tc(select_expr->get_data_type()) + || ob_is_collection_sql_type(select_expr->get_data_type())) { if (OB_FAIL(col_expr->set_enum_set_values(select_expr->get_enum_set_values()))) { LOG_WARN("failed to set_enum_set_values", K(ret)); } @@ -17754,6 +17850,59 @@ int ObDMLResolver::adjust_values_desc_position(ObInsertTableInfo& table_info, return ret; } +int ObDMLResolver::fill_vec_id_expr_param( + const uint64_t table_id, + const uint64_t index_tid, + const ObTableSchema *table_schema, + ObRawExpr *&vec_id_expr) +{ + int ret = OB_SUCCESS; + ObDMLStmt *stmt = get_stmt(); + uint64_t rowkey_vid_tid = index_tid; + if (OB_ISNULL(table_schema) || OB_ISNULL(vec_id_expr)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KP(table_schema), KP(vec_id_expr)); + } else if (OB_UNLIKELY(index_tid != table_schema->get_table_id())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid index table id", K(ret), K(index_tid), K(table_schema->get_table_id())); + } else if (OB_UNLIKELY(T_FUN_SYS_VEC_VID != vec_id_expr->get_expr_type())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("not doc id expr", K(ret), "expr type", vec_id_expr->get_expr_type()); + } else if (OB_ISNULL(session_info_) || OB_ISNULL(params_.expr_factory_) || OB_ISNULL(stmt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("session info is NULL", KP_(session_info), KP_(params_.expr_factory), KP(stmt)); + } else if (table_schema->is_user_table() && OB_FAIL(table_schema->get_rowkey_vid_tid(rowkey_vid_tid))) { + LOG_WARN("fail to get rowkey vid tid", K(ret), KPC(table_schema)); + } else { + CopySchemaExpr copier(*params_.expr_factory_); + ObSysFunRawExpr *expr = static_cast(vec_id_expr); + ObRawExpr *part_expr = stmt->get_part_expr(table_id, rowkey_vid_tid); + ObRawExpr *subpart_expr = stmt->get_subpart_expr(table_id, rowkey_vid_tid); + schema::ObPartitionLevel part_level = table_schema->get_part_level(); + ObRawExpr *calc_tablet_id_expr = nullptr; + ObRawExpr *copy_part_expr = nullptr; + ObRawExpr *copy_subpart_expr = nullptr; + if (OB_FAIL(copier.copy(part_expr, copy_part_expr))) { + LOG_WARN("fail to do part expr copy", K(ret)); + } else if (OB_FAIL(copier.copy(subpart_expr, copy_subpart_expr))) { + LOG_WARN("fail to do sub part expr copy", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::build_calc_partition_tablet_id_expr(*params_.expr_factory_, *session_info_, rowkey_vid_tid, + part_level, copy_part_expr, copy_subpart_expr, calc_tablet_id_expr))) { + LOG_WARN("fail to build calculate tablet id expr", K(ret), K(rowkey_vid_tid), KPC(table_schema)); + } else if (OB_ISNULL(calc_tablet_id_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null expr", K(ret), KP(calc_tablet_id_expr)); + } else if (OB_FAIL(expr->add_param_expr(calc_tablet_id_expr))) { + LOG_WARN("fail to replace param expr", K(ret), KP(calc_tablet_id_expr)); + } else if (OB_FAIL(expr->formalize(session_info_))) { + LOG_WARN("fail to formalize", K(ret), KP(session_info_)); + } + } + LOG_DEBUG("The dml resolver fills vec id expr parameter", K(ret), K(table_id), K(index_tid), K(rowkey_vid_tid), + KPC(vec_id_expr), KPC(table_schema)); + return ret; +} + int ObDMLResolver::fill_doc_id_expr_param( const uint64_t table_id, const uint64_t index_tid, @@ -18427,5 +18576,38 @@ int ObDMLResolver::add_udt_dependency(const pl::ObUserDefinedType &udt_type) return ret; } +int ObDMLResolver::check_vec_vid_need_column_ref_expr(ObDMLStmt &stmt, bool &need_column_ref_expr) +{ + int ret = OB_SUCCESS; + need_column_ref_expr = false; + if (stmt.is_delete_stmt() || stmt.is_update_stmt()) { + need_column_ref_expr = true; + } else if (stmt.is_select_stmt() && OB_ISNULL(upper_insert_resolver_)) { + need_column_ref_expr = true; + } else if (stmt.is_select_stmt() && OB_NOT_NULL(upper_insert_resolver_)) { + need_column_ref_expr = true; + if (OB_ISNULL(session_info_) || OB_ISNULL(schema_checker_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("session info or schema checker is nullptr", K(ret), KP(session_info_), KP(schema_checker_)); + } else if (session_info_->get_ddl_info().is_ddl()) { + ObDMLStmt *insert_stmt = upper_insert_resolver_->get_stmt(); + const share::schema::ObTableSchema *ddl_table_schema = nullptr; + if (OB_ISNULL(insert_stmt) || OB_UNLIKELY(insert_stmt->get_table_items().count() <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, insert stmt is nullptr or hasn't table item", K(ret), KPC(insert_stmt)); + } else if (OB_FAIL(schema_checker_->get_table_schema(session_info_->get_effective_tenant_id(), + insert_stmt->get_table_item(0)->ddl_table_id_, ddl_table_schema))) { + LOG_WARN("fail to get ddl table schema", K(ret), K(insert_stmt->get_table_item(0)->ddl_table_id_)); + } else if (OB_ISNULL(ddl_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ddl table schema is nullptr", K(ret), K(insert_stmt->get_table_item(0)->ddl_table_id_)); + } else if (ddl_table_schema->is_vec_rowkey_vid_type()) { + need_column_ref_expr = false; + } + } + } + return ret; +} + } // namespace sql } // namespace oceanbase diff --git a/src/sql/resolver/dml/ob_dml_resolver.h b/src/sql/resolver/dml/ob_dml_resolver.h index bca20fd746..144aa526f5 100644 --- a/src/sql/resolver/dml/ob_dml_resolver.h +++ b/src/sql/resolver/dml/ob_dml_resolver.h @@ -383,6 +383,7 @@ protected: ObDMLStmt *stmt = NULL); int adjust_values_desc_position(ObInsertTableInfo& table_info, ObIArray &value_idxs); + int generate_subschema_id(ObColumnRefRawExpr &col_expr); public: virtual int resolve_table(const ParseNode &parse_tree, TableItem *&table_item); protected: @@ -436,6 +437,7 @@ protected: int resolve_where_clause(const ParseNode *node); int resolve_order_clause(const ParseNode *node, bool is_for_set_query = false); int resolve_limit_clause(const ParseNode *node, bool disable_offset = false); + int resolve_approx_clause(const ParseNode *approx_node); int resolve_into_clause(const ParseNode *node); int resolve_hints(const ParseNode *node); int resolve_outline_data_hints(); @@ -476,6 +478,11 @@ protected: const uint64_t index_tid, const ObTableSchema *table_schema, ObRawExpr *&doc_id_expr); + int fill_vec_id_expr_param( + const uint64_t table_id, + const uint64_t index_tid, + const ObTableSchema *table_schema, + ObRawExpr *&vec_id_expr); int build_partid_expr(ObRawExpr *&expr, const uint64_t table_id); virtual int resolve_subquery_info(const common::ObIArray &subquery_info); virtual int resolve_inlist_info(common::ObIArray &inlist_infos); @@ -1058,6 +1065,9 @@ protected: const TableItem &table_item, TableItem *rowkey_doc_table, common::ObIArray &column_exprs); + int check_vec_vid_need_column_ref_expr( + ObDMLStmt &stmt, + bool &need_column_ref_expr); protected: ObStmtScope current_scope_; int32_t current_level_; diff --git a/src/sql/resolver/dml/ob_dml_stmt.cpp b/src/sql/resolver/dml/ob_dml_stmt.cpp index 9eef0e01b9..99d9dcd954 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.cpp +++ b/src/sql/resolver/dml/ob_dml_stmt.cpp @@ -429,6 +429,7 @@ ObDMLStmt::ObDMLStmt(stmt::StmtType type) check_constraint_items_(), dblink_id_(OB_INVALID_ID), is_reverse_link_(false), + has_vec_approx_(false), match_exprs_() { } @@ -541,6 +542,7 @@ int ObDMLStmt::assign(const ObDMLStmt &other) transpose_item_ = other.transpose_item_; dblink_id_ = other.dblink_id_; is_reverse_link_ = other.is_reverse_link_; + has_vec_approx_ = other.has_vec_approx_; } return ret; } @@ -706,6 +708,7 @@ int ObDMLStmt::deep_copy_stmt_struct(ObIAllocator &allocator, is_fetch_with_ties_ = other.is_fetch_with_ties_; dblink_id_ = other.dblink_id_; is_reverse_link_ = other.is_reverse_link_; + has_vec_approx_ = other.has_vec_approx_; } if (OB_SUCC(ret)) { TransposeItem *tmp = NULL; @@ -1835,10 +1838,15 @@ int ObDMLStmt::formalize_relation_exprs(ObSQLSessionInfo *session_info) ret = OB_ERR_UNEXPECTED; LOG_WARN("expr is NULL", K(ret)); } else if (column_expr->is_virtual_generated_column() && - (!column_expr->is_fulltext_column() && !column_expr->is_multivalue_generated_column())) { + (!column_expr->is_fulltext_column() && + !column_expr->is_multivalue_generated_column() && + !column_expr->is_vec_index_column())) { ObRawExpr *dependant_expr = static_cast( column_expr)->get_dependant_expr(); - if (OB_FAIL(dependant_expr->formalize(session_info))) { + if (dependant_expr == nullptr) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(dependant_expr->formalize(session_info))) { LOG_WARN("failed to formalize expr", K(ret)); } else if (OB_FAIL(dependant_expr->pull_relation_id())) { LOG_WARN("pull expr relation ids failed", K(ret), K(*dependant_expr)); @@ -4641,6 +4649,20 @@ int ObDMLStmt::has_virtual_generated_column(int64_t table_id, return ret; } +ObRawExpr* ObDMLStmt::get_first_vector_expr() const +{ + ObRawExpr* ret_expr = nullptr; + int order_size = get_order_item_size(); + bool found = false; + for (int i = 0; i < order_size && OB_ISNULL(ret_expr); ++i) { + ObRawExpr *tmp_expr = get_order_item(i).expr_; + if (OB_NOT_NULL(tmp_expr) && tmp_expr->is_vector_sort_expr()) { + ret_expr = tmp_expr; + } + } + return ret_expr; +} + int ObDMLStmt::check_hint_table_matched_table_item(ObCollationType cs_type, const ObTableInHint &hint_table, bool &matched) const diff --git a/src/sql/resolver/dml/ob_dml_stmt.h b/src/sql/resolver/dml/ob_dml_stmt.h index 2757506d04..89267892a3 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.h +++ b/src/sql/resolver/dml/ob_dml_stmt.h @@ -1075,6 +1075,8 @@ public: inline bool is_dblink_stmt() const { return OB_INVALID_ID != dblink_id_; } inline void set_reverse_link() { is_reverse_link_ = true; } inline bool is_reverse_link() const { return is_reverse_link_; } + inline void set_has_vec_approx(bool has_vec_approx) { has_vec_approx_ = has_vec_approx; } + inline bool has_vec_approx() const { return has_vec_approx_; } int add_subquery_ref(ObQueryRefRawExpr *query_ref); virtual int get_child_stmt_size(int64_t &child_size) const; int64_t get_subquery_expr_size() const { return subquery_exprs_.count(); } @@ -1146,7 +1148,8 @@ public: N_SUBQUERY_EXPRS, subquery_exprs_, N_USER_VARS, user_var_exprs_, K_(dblink_id), - K_(is_reverse_link)); + K_(is_reverse_link), + K_(has_vec_approx)); int check_if_contain_inner_table(bool &is_contain_inner_table) const; int check_if_contain_select_for_update(bool &is_contain_select_for_update) const; @@ -1201,6 +1204,7 @@ public: int has_virtual_generated_column(int64_t table_id, bool &has_virtual_col, bool ignore_fulltext_gen_col = false) const; + ObRawExpr *get_first_vector_expr() const; struct TempTableInfo { TempTableInfo() @@ -1335,6 +1339,7 @@ protected: */ int64_t dblink_id_; bool is_reverse_link_; + bool has_vec_approx_; // fulltext search exprs common::ObSEArray match_exprs_; }; diff --git a/src/sql/resolver/dml/ob_insert_resolver.cpp b/src/sql/resolver/dml/ob_insert_resolver.cpp index 573d8a8d1b..78836f92b4 100644 --- a/src/sql/resolver/dml/ob_insert_resolver.cpp +++ b/src/sql/resolver/dml/ob_insert_resolver.cpp @@ -678,9 +678,12 @@ int ObInsertResolver::resolve_values(const ParseNode &value_node, ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid select stmt", K(select_stmt)); } else if (!session_info_->get_ddl_info().is_ddl() && + !session_info_->get_ddl_info().is_dummy_ddl_for_inner_visibility() && OB_FAIL(check_insert_select_field(*insert_stmt, *select_stmt, is_mock_))) { LOG_WARN("check insert select field failed", K(ret), KPC(insert_stmt), KPC(select_stmt)); - } else if (!session_info_->get_ddl_info().is_ddl() && OB_FAIL(add_new_sel_item_for_oracle_temp_table(*select_stmt))) { + } else if (!session_info_->get_ddl_info().is_ddl() && + !session_info_->get_ddl_info().is_dummy_ddl_for_inner_visibility() && + OB_FAIL(add_new_sel_item_for_oracle_temp_table(*select_stmt))) { LOG_WARN("add session id value to select item failed", K(ret)); } else if (OB_FAIL(add_new_sel_item_for_oracle_label_security_table(insert_stmt->get_insert_table_info(), label_se_columns, @@ -1104,7 +1107,7 @@ int ObInsertResolver::mock_values_column_ref(const ObColumnRefRawExpr *column_re value_desc->set_ref_id(stmt->get_insert_table_info().table_id_, column_ref->get_column_id()); value_desc->set_column_attr(ObString::make_string(OB_VALUES), column_ref->get_column_name()); value_desc->set_udt_set_id(column_ref->get_udt_set_id()); - if (ob_is_enumset_tc(column_ref->get_result_type().get_type ()) + if ((ob_is_enumset_tc(column_ref->get_result_type().get_type()) || ob_is_collection_sql_type(column_ref->get_result_type().get_type())) && OB_FAIL(value_desc->set_enum_set_values(column_ref->get_enum_set_values()))) { LOG_WARN("failed to set_enum_set_values", K(*column_ref), K(ret)); } @@ -1301,7 +1304,9 @@ int ObInsertResolver::resolve_insert_constraint() if (OB_ISNULL(insert_stmt = get_insert_stmt()) || OB_ISNULL(session_info_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(insert_stmt), K(session_info_), K(ret)); - } else if (session_info_->get_ddl_info().is_ddl() || insert_stmt->has_instead_of_trigger()) { + } else if (session_info_->get_ddl_info().is_ddl() || + session_info_->get_ddl_info().is_dummy_ddl_for_inner_visibility() || + insert_stmt->has_instead_of_trigger()) { /*do nothing*/ } else if (OB_ISNULL(table_item = insert_stmt->get_table_item_by_id( insert_stmt->get_insert_table_info().table_id_))) { diff --git a/src/sql/resolver/dml/ob_multi_table_insert_resolver.cpp b/src/sql/resolver/dml/ob_multi_table_insert_resolver.cpp index 84a814ad8d..8cbdf84d35 100644 --- a/src/sql/resolver/dml/ob_multi_table_insert_resolver.cpp +++ b/src/sql/resolver/dml/ob_multi_table_insert_resolver.cpp @@ -602,7 +602,8 @@ int ObMultiTableInsertResolver::mock_values_column_ref(const ObColumnRefRawExpr value_desc->set_ref_id(table_info.table_id_, column_ref->get_column_id()); value_desc->set_column_attr(ObString::make_string(OB_VALUES), column_ref->get_column_name()); value_desc->set_udt_set_id(column_ref->get_udt_set_id()); - if (ob_is_enumset_tc(column_ref->get_result_type().get_type ()) + if ((ob_is_enumset_tc(column_ref->get_result_type().get_type()) + || ob_is_collection_sql_type(column_ref->get_result_type().get_type())) && OB_FAIL(value_desc->set_enum_set_values(column_ref->get_enum_set_values()))) { LOG_WARN("failed to set_enum_set_values", K(*column_ref), K(ret)); } diff --git a/src/sql/resolver/dml/ob_select_resolver.cpp b/src/sql/resolver/dml/ob_select_resolver.cpp index 604b93d1f7..0bc099c633 100644 --- a/src/sql/resolver/dml/ob_select_resolver.cpp +++ b/src/sql/resolver/dml/ob_select_resolver.cpp @@ -1377,6 +1377,7 @@ int ObSelectResolver::resolve_normal_query(const ParseNode &parse_tree) } } OZ( resolve_order_clause(parse_tree.children_[PARSE_SELECT_ORDER]) ); + OZ( resolve_approx_clause(parse_tree.children_[PARSE_SELECT_APPROX])); OZ( resolve_limit_clause(parse_tree.children_[PARSE_SELECT_LIMIT]) ); OZ( resolve_fetch_clause(parse_tree.children_[PARSE_SELECT_FETCH]) ); OZ( resolve_check_option_clause(parse_tree.children_[PARSE_SELECT_WITH_CHECK_OPTION]) ); diff --git a/src/sql/resolver/dml/ob_sql_hint.cpp b/src/sql/resolver/dml/ob_sql_hint.cpp index c6c94b7521..eea39c4591 100644 --- a/src/sql/resolver/dml/ob_sql_hint.cpp +++ b/src/sql/resolver/dml/ob_sql_hint.cpp @@ -2438,8 +2438,8 @@ int LogTableHint::init_index_hints(ObSqlSchemaGuard &schema_guard) OB_ISNULL(index_schema)) { ret = OB_SCHEMA_ERROR; LOG_WARN("fail to get table schema", K(index_id), K(ret)); - } else if (index_schema->is_fts_index()) { - // just ignore domain index + } else if (index_schema->is_fts_index() || index_schema->is_vec_index()) { + // just ignore fts && vector index } else if (OB_FAIL(index_schema->get_index_name(index_name))) { LOG_WARN("fail to get index name", K(index_name), K(ret)); } diff --git a/src/sql/resolver/expr/ob_expr_info_flag.h b/src/sql/resolver/expr/ob_expr_info_flag.h index b1ef79a98b..780c252fa6 100644 --- a/src/sql/resolver/expr/ob_expr_info_flag.h +++ b/src/sql/resolver/expr/ob_expr_info_flag.h @@ -131,6 +131,7 @@ enum ObExprInfoFlag IS_ROWID_SIMPLE_COND, // rowid = const IS_ROWID_RANGE_COND, // rowid belongs to a range IS_TABLE_ASSIGN, // update t1 set c1 = const + IS_ATTR_EXPR, // collection attr expr IS_EXISTS, }; diff --git a/src/sql/resolver/expr/ob_raw_expr.cpp b/src/sql/resolver/expr/ob_raw_expr.cpp index 3bf163d305..68c71b6b4d 100644 --- a/src/sql/resolver/expr/ob_raw_expr.cpp +++ b/src/sql/resolver/expr/ob_raw_expr.cpp @@ -289,6 +289,8 @@ int ObRawExpr::assign(const ObRawExpr &other) LOG_WARN("failed to assign enum set values", K(ret)); } else if (OB_FAIL(local_session_var_.assign(other.local_session_var_))) { LOG_WARN("fail to assign local session vars", K(ret)); + } else if (OB_FAIL(attr_exprs_.assign(other.attr_exprs_))) { + LOG_WARN("failed to assign exprs", K(ret)); } } } @@ -921,6 +923,7 @@ int ObRawExpr::is_const_inherit_expr(bool &is_const_inherit, || T_FUN_SYS_SEQ_NEXTVAL == type_ || T_FUN_SYS_AUTOINC_NEXTVAL == type_ || T_FUN_SYS_DOC_ID == type_ + || T_FUN_SYS_VEC_VID == type_ || T_FUN_SYS_TABLET_AUTOINC_NEXTVAL == type_ || T_FUN_SYS_ROWNUM == type_ || T_FUN_SYS_ROWKEY_TO_ROWID == type_ @@ -1149,6 +1152,24 @@ int ObRawExpr::has_exec_param(bool &bool_ret) const return ret; } +const ObRawExpr *ObRawExpr::get_attr_expr(int64_t index) const +{ + const ObRawExpr *expr = NULL; + if (index >= 0 && index < attr_exprs_.count()) { + expr = attr_exprs_.at(index); + } + return expr; +} + +ObRawExpr *ObRawExpr::get_attr_expr(int64_t index) +{ + if (index >= 0 && index < attr_exprs_.count()) { + return attr_exprs_.at(index); + } else { + return USELESS_POINTER; + } +} + //////////////////////////////////////////////////////////////// int ObConstRawExpr::assign(const ObRawExpr &other) { diff --git a/src/sql/resolver/expr/ob_raw_expr.h b/src/sql/resolver/expr/ob_raw_expr.h index d04cfd54c4..eacabff41c 100644 --- a/src/sql/resolver/expr/ob_raw_expr.h +++ b/src/sql/resolver/expr/ob_raw_expr.h @@ -1759,7 +1759,8 @@ public: is_deterministic_(true), partition_id_calc_type_(CALC_INVALID), local_session_var_(), - local_session_var_id_(OB_INVALID_INDEX_INT64) + local_session_var_id_(OB_INVALID_INDEX_INT64), + attr_exprs_() { } @@ -1783,7 +1784,8 @@ public: runtime_filter_type_(NOT_INIT_RUNTIME_FILTER_TYPE), with_null_equal_cond_(false), local_session_var_(&alloc), - local_session_var_id_(OB_INVALID_INDEX_INT64) + local_session_var_id_(OB_INVALID_INDEX_INT64), + attr_exprs_() { } virtual ~ObRawExpr(); @@ -1992,6 +1994,7 @@ public: partition_id_calc_type_ = calc_type; } bool is_json_expr() const; bool is_multiset_expr() const; + bool is_vector_sort_expr() const { return get_expr_type() == T_FUN_SYS_L2_DISTANCE; } PartitionIdCalcType get_partition_id_calc_type() const { return partition_id_calc_type_; } void set_may_add_interval_part(MayAddIntervalPart flag) { may_add_interval_part_ = flag; @@ -2037,6 +2040,12 @@ public: int extract_local_session_vars_recursively(ObIArray &var_array); void set_local_session_var_id(int64_t idx) { local_session_var_id_ = idx; } int64_t get_local_session_var_id() { return local_session_var_id_; } + int64_t get_attr_count() const { return attr_exprs_.count(); } + const ObRawExpr *get_attr_expr(int64_t index) const; + ObRawExpr *get_attr_expr(int64_t index); + common::ObIArray &get_attr_exprs() { return attr_exprs_; } + const common::ObIArray &get_attr_exprs() const { return attr_exprs_; } + int add_attr_expr(ObRawExpr *expr) { return attr_exprs_.push_back(expr); } int get_expr_dep_session_vars_recursively(const ObBasicSessionInfo *session, ObLocalSessionVar &dep_vars); @@ -2086,6 +2095,7 @@ protected: bool with_null_equal_cond_; ObLocalSessionVar local_session_var_; int64_t local_session_var_id_; + common::ObSEArray attr_exprs_; private: DISALLOW_COPY_AND_ASSIGN(ObRawExpr); }; @@ -2746,6 +2756,11 @@ public: inline bool is_default_on_null_identity_column() const { return share::schema::ObSchemaUtils::is_default_on_null_identity_column(column_flags_); } inline bool is_fulltext_column() const { return share::schema::ObSchemaUtils::is_fulltext_column(column_flags_); } inline bool is_doc_id_column() const { return share::schema::ObSchemaUtils::is_doc_id_column(column_flags_); } + inline bool is_vec_vid_column() const { return share::schema::ObSchemaUtils::is_vec_vid_column(column_flags_); } + inline bool is_vec_vector_column() const { return share::schema::ObSchemaUtils::is_vec_vector_column(column_flags_); } + inline bool is_vec_type_column() const { return share::schema::ObSchemaUtils::is_vec_type_column(column_flags_); } + inline bool is_vec_scn_column() const { return share::schema::ObSchemaUtils::is_vec_scn_column(column_flags_); } + inline bool is_vec_index_column() const {return share::schema::ObSchemaUtils::is_vec_index_column(column_flags_);} inline bool is_word_segment_column() const { return column_name_.prefix_match(OB_WORD_SEGMENT_COLUMN_NAME_PREFIX); } inline bool is_word_count_column() const { return column_name_.prefix_match(OB_WORD_COUNT_COLUMN_NAME_PREFIX); } inline bool is_spatial_generated_column() const { return share::schema::ObSchemaUtils::is_spatial_generated_column(column_flags_); } @@ -2756,6 +2771,7 @@ public: inline bool is_table_part_key_column() const { return column_flags_ & TABLE_PART_KEY_COLUMN_FLAG; } inline bool is_table_part_key_org_column() const { return column_flags_ & TABLE_PART_KEY_COLUMN_ORG_FLAG; } inline bool has_table_alias_name() const { return column_flags_ & TABLE_ALIAS_NAME_FLAG; } + void del_column_flag(uint64_t flag) { column_flags_ &= ~flag; } void set_column_flags(uint64_t column_flags) { column_flags_ = column_flags; } void set_table_alias_name() { column_flags_ |= TABLE_ALIAS_NAME_FLAG; } void set_table_part_key_column() { column_flags_ |= TABLE_PART_KEY_COLUMN_FLAG; } diff --git a/src/sql/resolver/expr/ob_raw_expr_deduce_type.cpp b/src/sql/resolver/expr/ob_raw_expr_deduce_type.cpp index 36e517a65e..d0fd52c61e 100644 --- a/src/sql/resolver/expr/ob_raw_expr_deduce_type.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_deduce_type.cpp @@ -175,11 +175,28 @@ int ObRawExprDeduceType::visit(ObColumnRefRawExpr &expr) } else if (OB_ISNULL(exec_ctx)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("need context to search subschema mapping", K(ret), K(udt_id)); - } else if (FALSE_IT(subschema_id = ObMaxSystemUDTSqlType)) { - } else if (OB_FAIL(exec_ctx->get_subschema_id_by_udt_id(udt_id, subschema_id))) { - LOG_WARN("failed to get subschema id by udt id", K(ret), K(udt_id)); + } else if (ObObjUDTUtil::ob_is_supported_sql_udt(udt_id)) { + subschema_id = ObMaxSystemUDTSqlType; + if (OB_FAIL(exec_ctx->get_subschema_id_by_udt_id(udt_id, subschema_id))) { + LOG_WARN("failed to get subschema id by udt id", K(ret), K(udt_id)); + } else { + expr.set_subschema_id(subschema_id); + } + } else if (expr.get_enum_set_values().count() > 0) { + // array type + if (OB_FAIL(exec_ctx->get_subschema_id_by_type_string(expr.get_enum_set_values().at(0), subschema_id))) { + LOG_WARN("failed to get array type subschema id", K(ret)); + } else if (FALSE_IT(expr.set_subschema_id(subschema_id))) { + } else if (OB_FAIL(construct_collecton_attr_expr(expr))) { + LOG_WARN("failed to construct collection attr expr", K(ret)); + } } else { - expr.set_subschema_id(subschema_id); + // stmt : insert into arr_t1 select array(), array() is mock colunmn_expr which isn't with enum_set_values + // just check subschema_id validity + ObSubSchemaValue meta_unused; + if (OB_FAIL(exec_ctx->get_sqludt_meta_by_subschema_id(subschema_id, meta_unused))) { + LOG_WARN("invalid subschema id", K(ret), K(subschema_id)); + } } } return ret; @@ -721,6 +738,25 @@ int ObRawExprDeduceType::calc_result_type(ObNonTerminalRawExpr &expr, if (expr.get_result_type().has_result_flag(ZEROFILL_FLAG)) { cast_mode |= CM_ZERO_FILL; } + if (ob_is_collection_sql_type(expr.get_result_type().get_type()) + && !ObObjUDTUtil::ob_is_supported_sql_udt(expr.get_result_type().get_udt_id())) { + if (expr.get_expr_class() == ObRawExpr::EXPR_OPERATOR + || expr.get_expr_class() == ObRawExpr::EXPR_SYS_FUNC + || expr.get_expr_class() == ObRawExpr::EXPR_SET_OP) { + ObOpRawExpr *op_expr = static_cast(&expr); + if (OB_FAIL(construct_collecton_attr_expr(*op_expr))) { + LOG_WARN("failed to construct collection attr expr", K(ret)); + } + } else if (expr.get_expr_class() == ObRawExpr::EXPR_CASE_OPERATOR) { + ObCaseOpRawExpr *op_expr = static_cast(&expr); + if (OB_FAIL(construct_collecton_attr_expr(*op_expr))) { + LOG_WARN("failed to construct collection attr expr", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected expr class type", K(ret), K(expr)); + } + } } LOG_DEBUG("calc_result_type", K(ret), K(expr), K(types), K(cast_mode)); } @@ -728,6 +764,124 @@ int ObRawExprDeduceType::calc_result_type(ObNonTerminalRawExpr &expr, return ret; } +template +int ObRawExprDeduceType::add_attr_exprs(const ObCollectionTypeBase *coll_meta, RawExprType &expr) +{ + int ret = OB_SUCCESS; + ObItemType expr_type = T_REF_COLUMN; + if (coll_meta->type_id_ == ObNestedType::OB_ARRAY_TYPE) { + ObColumnRefRawExpr *attr_expr = NULL; + const ObCollectionArrayType *arr_meta = static_cast(coll_meta); + if (OB_FAIL(ObRawExprUtils::create_attr_expr(expr_factory_, my_session_, + expr_type, ArrayAttr::ATTR_NULL_BITMAP, + attr_expr))) { + LOG_WARN("failed to create nullbitmap attr expr", K(ret)); + } else if (OB_FAIL(expr.add_attr_expr(attr_expr))) { + LOG_WARN("failed to add attr expr", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::create_attr_expr(expr_factory_, my_session_, + expr_type, ArrayAttr::ATTR_OFFSETS, + attr_expr))) { + LOG_WARN("failed to create offset attr expr", K(ret)); + } else if (OB_FAIL(expr.add_attr_expr(attr_expr))) { + LOG_WARN("failed to add attr expr", K(ret)); + } else if (OB_FAIL(add_attr_exprs(arr_meta->element_type_, expr))) { + LOG_WARN("failed to add attr expr", K(ret)); + } + } else if (coll_meta->type_id_ == ObNestedType::OB_BASIC_TYPE) { + ObColumnRefRawExpr *attr_expr = NULL; + const ObCollectionBasicType *elem_type = static_cast(coll_meta); + if (OB_FAIL(ObRawExprUtils::create_attr_expr(expr_factory_, my_session_, + expr_type, ArrayAttr::ATTR_NULL_BITMAP, + attr_expr))) { + LOG_WARN("failed to create nullbitmap attr expr", K(ret)); + } else if (OB_FAIL(expr.add_attr_expr(attr_expr))) { + LOG_WARN("failed to add attr expr", K(ret)); + } else if (!is_fixed_length(elem_type->basic_meta_.get_obj_type())) { + if (OB_FAIL(ObRawExprUtils::create_attr_expr(expr_factory_, my_session_, + expr_type, ArrayAttr::ATTR_OFFSETS, + attr_expr))) { + LOG_WARN("failed to create nullbitmap attr expr", K(ret)); + } else if (OB_FAIL(expr.add_attr_expr(attr_expr))) { + LOG_WARN("failed to add attr expr", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObRawExprUtils::create_attr_expr(expr_factory_, my_session_, + expr_type, ArrayAttr::ATTR_DATA, + attr_expr))) { + LOG_WARN("failed to create nullbitmap attr expr", K(ret)); + } else if (OB_FAIL(expr.add_attr_expr(attr_expr))) { + LOG_WARN("failed to add attr expr", K(ret)); + } + } + return ret; +} + +template +int ObRawExprDeduceType::construct_collecton_attr_expr(RawExprType &expr) +{ + int ret = OB_SUCCESS; + uint16_t subschema_id = expr.get_result_type().get_subschema_id(); + ObExecContext *exec_ctx = const_cast(my_session_->get_cur_exec_ctx()); + ObSubSchemaValue value; + const ObSqlCollectionInfo *coll_info = NULL; + ObItemType expr_type = expr.get_expr_type(); + bool need_set_values = expr.get_enum_set_values().empty(); + bool need_construct_attrs = true; + if (expr.get_attr_count() > 0) { + // attrs constructed already, do nothing + need_construct_attrs = false; + } else if (expr.is_const_expr()) { + // is uniform format, do nothing + need_construct_attrs = false; + } + if (!need_set_values && !need_construct_attrs) { + // do nothing + } else if (OB_ISNULL(exec_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("exec ctx is null", K(ret)); + } else if (OB_ISNULL(expr_factory_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null raw expr", K(ret)); + } else if (OB_FAIL(exec_ctx->get_sqludt_meta_by_subschema_id(subschema_id, value))) { + LOG_WARN("failed to get subschema ctx", K(ret)); + } else if (OB_ISNULL(value.value_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("subschema is null", K(ret)); + } else { + coll_info = reinterpret_cast(value.value_); + ObCollectionTypeBase *coll_meta = coll_info->collection_meta_; + ObColumnRefRawExpr *attr_expr = NULL; + if (coll_meta->type_id_ != ObNestedType::OB_ARRAY_TYPE && coll_meta->type_id_ != ObNestedType::OB_VECTOR_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected meta type", K(ret), K(coll_meta->type_id_)); + } else if (OB_ISNULL(coll_meta)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("subschema is null", K(ret)); + } else if (need_construct_attrs) { + if (OB_FAIL(ObRawExprUtils::create_attr_expr(expr_factory_, my_session_, + T_REF_COLUMN, ArrayAttr::ATTR_LENGTH, + attr_expr))) { + LOG_WARN("failed to create nullbitmap attr expr", K(ret)); + } else if (OB_FAIL(expr.add_attr_expr(attr_expr))) { + LOG_WARN("failed to add attr expr", K(ret)); + } else if (OB_FAIL(add_attr_exprs(reinterpret_cast(coll_meta)->element_type_, expr))) { + LOG_WARN("failed to add attr expr", K(ret)); + } + } + if (OB_SUCC(ret) && need_set_values) { + ObString def = coll_info->get_def_string(); + ObSEArray enum_set_values; + if (OB_FAIL(enum_set_values.push_back(def))) { + LOG_WARN("failed to push back array", K(ret)); + } else if (OB_FAIL(expr.set_enum_set_values(enum_set_values))) { + LOG_WARN("failed to set values", K(ret)); + } + } + } + return ret; +} + int ObRawExprDeduceType::visit(ObOpRawExpr &expr) { int ret = OB_SUCCESS; @@ -1699,6 +1853,36 @@ int ObRawExprDeduceType::visit(ObAggFunRawExpr &expr) scale_increment_recover = result_type.get_scale(); result_type.set_scale(static_cast(result_type.get_scale() + scale_increment)); } + } else if (ob_is_collection_sql_type(obj_type)) { + if (T_FUN_SUM == expr.get_expr_type() || T_FUN_AVG == expr.get_expr_type()) { + ObSQLSessionInfo *session = const_cast(my_session_); + ObExecContext *exec_ctx = OB_ISNULL(session) ? NULL : session->get_cur_exec_ctx(); + uint16_t subschema_id = result_type.get_subschema_id(); + ObSubSchemaValue value; + const ObSqlCollectionInfo *coll_info = NULL; + if (OB_ISNULL(exec_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("need context to search subschema mapping", K(ret), K(subschema_id)); + } else if (OB_FAIL(exec_ctx->get_sqludt_meta_by_subschema_id(subschema_id, value))) { + LOG_WARN("failed to get subschema ctx", K(ret)); + } else if (value.type_ >= OB_SUBSCHEMA_MAX_TYPE) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid subschema type", K(ret), K(value)); + } else if (FALSE_IT(coll_info = reinterpret_cast(value.value_))) { + } else if (coll_info->collection_meta_->type_id_ == ObNestedType::OB_VECTOR_TYPE) { + result_type.set_collection(subschema_id); + expr.set_result_type(result_type); + if (OB_FAIL(construct_collecton_attr_expr(expr))) { + LOG_WARN("failed to construct collection attr expr", K(ret)); + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported collection type", K(ret), "type", coll_info->collection_meta_->type_id_); + } + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Incorrect collection arguments", K(child_expr->get_data_type()), K(ret)); + } } else { if (ob_is_number_tc(obj_type)) { result_type.set_number(); @@ -2500,8 +2684,13 @@ int ObRawExprDeduceType::visit(ObSysFunRawExpr &expr) int ObRawExprDeduceType::visit(ObSetOpRawExpr &expr) { - UNUSED(expr); int ret = OB_SUCCESS; + if (ob_is_collection_sql_type(expr.get_result_type().get_type()) + && !ObObjUDTUtil::ob_is_supported_sql_udt(expr.get_result_type().get_udt_id())) { + if (OB_FAIL(construct_collecton_attr_expr(expr))) { + LOG_WARN("failed to construct collection attr expr", K(ret)); + } + } return ret; } @@ -2607,6 +2796,11 @@ int ObRawExprDeduceType::visit(ObWinFunRawExpr &expr) LOG_WARN("deduce type failed", K(ret)); } else { expr.set_result_type(expr.get_agg_expr()->get_result_type()); + if (expr.get_result_type().is_collection_sql_type()) { + if (OB_FAIL(expr.set_enum_set_values(expr.get_agg_expr()->get_enum_set_values()))) { + LOG_WARN("failed to set_enum_set_values", K(ret)); + } + } } //here pl_agg_udf_expr_ in win_expr must be null, defensive check!!! } else if (OB_UNLIKELY(expr.get_pl_agg_udf_expr() != NULL)) { @@ -3295,6 +3489,9 @@ int ObRawExprDeduceType::set_agg_min_max_result_type(ObAggFunRawExpr &expr, } else if (OB_UNLIKELY(ob_is_roaringbitmap(child_expr->get_data_type()))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("Incorrect roaringbitmap arguments", K(child_expr->get_data_type()), K(ret)); + } else if (OB_UNLIKELY(ob_is_collection_sql_type(child_expr->get_data_type()))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Incorrect collection arguments", K(child_expr->get_data_type()), K(ret)); } else if (OB_UNLIKELY(ob_is_enumset_tc(child_expr->get_data_type()))) { // To compatible with MySQL, we need to add cast expression that enumset to varchar // to evalute MIN/MAX aggregate functions. diff --git a/src/sql/resolver/expr/ob_raw_expr_deduce_type.h b/src/sql/resolver/expr/ob_raw_expr_deduce_type.h index 621533f886..572e914f39 100644 --- a/src/sql/resolver/expr/ob_raw_expr_deduce_type.h +++ b/src/sql/resolver/expr/ob_raw_expr_deduce_type.h @@ -14,6 +14,7 @@ #define _OB_RAW_EXPR_DEDUCE_TYPE_H 1 #include "sql/resolver/expr/ob_raw_expr.h" #include "lib/container/ob_iarray.h" +#include "lib/udt/ob_collection_type.h" #include "common/ob_accuracy.h" #include "share/ob_i_sql_expression.h" namespace oceanbase @@ -84,6 +85,10 @@ private: int push_back_types(const ObRawExpr *param_expr, ObExprResTypes &types); int calc_result_type(ObNonTerminalRawExpr &expr, ObIExprResTypes &types, common::ObCastMode &cast_mode, int32_t row_dimension); + template + int construct_collecton_attr_expr(RawExprType &expr); + template + int add_attr_exprs(const ObCollectionTypeBase *coll_meta, RawExprType &expr); int calc_result_type_with_const_arg( ObNonTerminalRawExpr &expr, ObIExprResTypes &types, diff --git a/src/sql/resolver/expr/ob_raw_expr_info_extractor.cpp b/src/sql/resolver/expr/ob_raw_expr_info_extractor.cpp index 48a80c5940..1b58a3301b 100644 --- a/src/sql/resolver/expr/ob_raw_expr_info_extractor.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_info_extractor.cpp @@ -480,6 +480,7 @@ int ObRawExprInfoExtractor::visit(ObSysFunRawExpr &expr) } else { // these functions should not be calculated first if (T_FUN_SYS_AUTOINC_NEXTVAL == expr.get_expr_type() + || T_FUN_SYS_VEC_VID == expr.get_expr_type() || T_FUN_SYS_DOC_ID == expr.get_expr_type() || T_FUN_SYS_TABLET_AUTOINC_NEXTVAL == expr.get_expr_type() || T_FUN_SYS_SLEEP == expr.get_expr_type() diff --git a/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp b/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp index 721269f99e..6aff2861a0 100644 --- a/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp @@ -1298,10 +1298,15 @@ int ObRawExprResolverImpl::do_recursive_resolve(const ParseNode *node, case T_FUN_SYS_MULTILINESTRING: case T_FUN_SYS_POLYGON: case T_FUN_SYS_MULTIPOLYGON: - case T_FUN_SYS_GEOMCOLLECTION: { + case T_FUN_SYS_GEOMCOLLECTION: + case T_FUN_SYS_ARRAY: { OZ (process_geo_func_node(node, expr)); break; } + case T_FUN_SYS_VECTOR_DISTANCE: { + OZ (process_vector_func_node(node, expr)); + break; + } case T_FUN_SYS_XML_ELEMENT: { if (OB_FAIL(process_xml_element_node(node, expr))) { LOG_WARN("failed to process xmlelement node", K(ret)); @@ -1356,6 +1361,12 @@ int ObRawExprResolverImpl::do_recursive_resolve(const ParseNode *node, } break; } + case T_FUNC_SYS_ARRAY_CONTAINS: { + if (OB_FAIL(process_array_contains_node(node, expr))) { + LOG_WARN("fail to process sql udt access attr node", K(ret), K(node)); + } + break; + } default: ret = OB_ERR_PARSER_SYNTAX; LOG_WARN("Wrong type in expression", K(get_type_name(node->type_))); @@ -1496,6 +1507,49 @@ int ObRawExprResolverImpl::process_sql_udt_construct_node(const ParseNode *node, return ret; } +int ObRawExprResolverImpl::process_array_contains_node(const ParseNode *node, ObRawExpr *&expr) +{ + int ret = OB_SUCCESS; + ObSysFunRawExpr *func_expr = NULL; + if (OB_ISNULL(node)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(node)); + } else if (OB_UNLIKELY(2 != node->num_child_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected param num", K(node)); + } else if (OB_FAIL(ctx_.expr_factory_.create_raw_expr(node->type_, func_expr))) { + LOG_WARN("fail to create raw expr", K(ret)); + } else { + func_expr->set_func_name(N_ARRAY_CONTAINS); + if (OB_UNLIKELY(T_EXPR_LIST != node->children_[1]->type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid children for array contains function", K(node), K(node->children_[1])); + } else if (OB_UNLIKELY(1 != node->children_[1]->num_child_)) { + ret = OB_ERR_PARAM_SIZE; + LOG_WARN("invalid children for array contains function", K(node), K(node->children_[1]->num_child_)); + } else { + ObRawExpr *para_expr = NULL; + if (OB_FAIL(SMART_CALL(recursive_resolve(node->children_[0], para_expr)))) { + LOG_WARN("fail to recursive resolve expr list item", K(ret)); + } else if (OB_FAIL(func_expr->add_param_expr(para_expr))) { + LOG_WARN("fail to add param expr to expr", K(ret)); + } else if (OB_ISNULL(node->children_[1]->children_[0])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid expr list node children", K(ret), K(node->children_[0])); + } else if (OB_FAIL(SMART_CALL(recursive_resolve(node->children_[1]->children_[0], para_expr)))) { + LOG_WARN("fail to recursive resolve expr list item", K(ret)); + } else if (OB_FAIL(func_expr->add_param_expr(para_expr))) { + LOG_WARN("fail to add param expr to expr", K(ret)); + } + } + } + if (OB_SUCC(ret)) { + func_expr->set_extra(1); // param order is reversed, so set extra to 1 + expr = func_expr; + } + return ret; +} + int ObRawExprResolverImpl::process_sql_udt_attr_access_node(const ParseNode *node, ObRawExpr *&expr) { INIT_SUCC(ret); @@ -3300,6 +3354,35 @@ int ObRawExprResolverImpl::process_char_charset_node(const ParseNode *node, ObRa return ret; } +int ObRawExprResolverImpl::process_vector_func_node(const ParseNode *node, ObRawExpr *&expr) +{ + int ret = OB_SUCCESS; + ObSysFunRawExpr *func_expr = NULL; + if (OB_ISNULL(node)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(node)); + } else if (OB_FAIL(ctx_.expr_factory_.create_raw_expr(node->type_, func_expr))) { + LOG_WARN("fail to create raw expr", K(ret)); + } else { + func_expr->set_func_name(N_VECTOR_DISTANCE); + for (int64_t i = 0; OB_SUCC(ret) && i < node->num_child_; ++i) { + ObRawExpr *para_expr = NULL; + if (OB_ISNULL(node->children_[i])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid expr list node children", K(ret), K(i), K(node->children_[i])); + } else if (OB_FAIL(SMART_CALL(recursive_resolve(node->children_[i], para_expr)))) { + LOG_WARN("fail to recursive resolve expr list item", K(ret)); + } else if (OB_FAIL(func_expr->add_param_expr(para_expr))) { + LOG_WARN("fail to add param expr to expr", K(ret)); + } + } + } + if (OB_SUCC(ret)) { + expr = func_expr; + } + return ret; +} + int ObRawExprResolverImpl::set_geo_func_name(ObSysFunRawExpr *func_expr, const ObItemType func_type) { @@ -3338,6 +3421,10 @@ int ObRawExprResolverImpl::set_geo_func_name(ObSysFunRawExpr *func_expr, OX(func_expr->set_func_name(N_GEOMCOLLECTION)); break; } + case T_FUN_SYS_ARRAY: { + OX(func_expr->set_func_name(N_ARRAY)); + break; + } default: { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid geometry function", K(ret), K(func_type)); diff --git a/src/sql/resolver/expr/ob_raw_expr_resolver_impl.h b/src/sql/resolver/expr/ob_raw_expr_resolver_impl.h index 1472d3f24e..4a0e8ab58b 100644 --- a/src/sql/resolver/expr/ob_raw_expr_resolver_impl.h +++ b/src/sql/resolver/expr/ob_raw_expr_resolver_impl.h @@ -161,6 +161,7 @@ private: ObRawExpr *&date_unit_expr); int process_geo_func_node(const ParseNode *node, ObRawExpr *&expr); int set_geo_func_name(ObSysFunRawExpr *func_expr, const ObItemType func_type); + int process_vector_func_node(const ParseNode *node, ObRawExpr *&expr); bool is_win_expr_valid_scope(ObStmtScope scope) const; int check_and_canonicalize_window_expr(ObRawExpr *expr); int process_ident_node(const ParseNode &node, ObRawExpr *&expr); @@ -217,7 +218,7 @@ private: int resolve_dblink_udf_expr(const ParseNode *node, ObQualifiedName &column_ref, ObRawExpr *&expr); - + int process_array_contains_node(const ParseNode *node, ObRawExpr *&expr); private: int process_sys_func_params(ObSysFunRawExpr &func_expr, int current_columns_count); int transform_ratio_afun_to_arg_div_sum(const ParseNode *ratio_to_report, ParseNode *&div); diff --git a/src/sql/resolver/expr/ob_raw_expr_util.cpp b/src/sql/resolver/expr/ob_raw_expr_util.cpp index 795010c4f6..cde2780d6f 100644 --- a/src/sql/resolver/expr/ob_raw_expr_util.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_util.cpp @@ -21,6 +21,7 @@ #include "lib/string/ob_sql_string.h" #include "lib/json/ob_json.h" #include "lib/json/ob_json_print_utils.h" +#include "lib/udt/ob_array_type.h" #include "sql/resolver/dml/ob_select_stmt.h" #include "sql/resolver/expr/ob_raw_expr.h" #include "sql/resolver/expr/ob_raw_expr_resolver_impl.h" @@ -40,6 +41,7 @@ #include "sql/optimizer/ob_optimizer_util.h" #include "sql/resolver/dml/ob_dml_resolver.h" #include "sql/resolver/dml/ob_select_resolver.h" +#include "sql/resolver/expr/ob_raw_expr_deduce_type.h" namespace oceanbase { @@ -4290,6 +4292,30 @@ int ObRawExprUtils::implict_cast_sql_udt_to_pl_udt(ObRawExprFactory *expr_factor return ret; } +template +int ObRawExprUtils::create_attr_expr(ObRawExprFactory *expr_factory, + const ObSQLSessionInfo *session, + ObItemType expr_type, + ArrayAttr attr_type, + RawExprType* &attr_expr) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(expr_factory->create_raw_expr(expr_type, attr_expr))) { + LOG_WARN("create raw expr failed", K(ret)); + } else if (OB_ISNULL(attr_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("attr expr is null"); + } else if (OB_FAIL(attr_expr->add_flag(IS_ATTR_EXPR))) { + LOG_WARN("attr expr add flag failed"); + } else if (attr_type == ArrayAttr::ATTR_LENGTH && FALSE_IT(attr_expr->set_data_type(ObUInt32Type))) { + } else if ((attr_type == ArrayAttr::ATTR_NULL_BITMAP || attr_type == ArrayAttr::ATTR_OFFSETS ||attr_type == ArrayAttr::ATTR_DATA) && + FALSE_IT(attr_expr->set_data_type(ObVarcharType))) { + } else if (OB_FAIL(attr_expr->formalize(session))) { + LOG_WARN("failed to formalize expr", K(ret)); + } + return ret; +} + int ObRawExprUtils::create_cast_expr(ObRawExprFactory &expr_factory, ObRawExpr *src_expr, const ObExprResType &dst_type, @@ -5140,7 +5166,8 @@ int ObRawExprUtils::create_param_expr(ObRawExprFactory &expr_factory, int64_t pa if (expr->is_bool_expr()) { c_expr->set_is_literal_bool(true); } - if (ob_is_enumset_tc(expr->get_result_type().get_type())) { + if (ob_is_enumset_tc(expr->get_result_type().get_type()) + || ob_is_collection_sql_type(expr->get_result_type().get_type())) { if (OB_FAIL(c_expr->set_enum_set_values(expr->get_enum_set_values()))) { LOG_WARN("failed to set enum_set_values", K(*expr), K(ret)); } @@ -5197,6 +5224,10 @@ bool ObRawExprUtils::check_exprs_type_collation_accuracy_equal(const ObRawExpr * && expr1->get_collation_type() == expr2->get_collation_type() && expr1->get_accuracy() == expr2->get_accuracy()) { equal = true; + if (ob_is_collection_sql_type(expr1->get_data_type()) + && expr1->get_subschema_id() != expr2->get_subschema_id()) { + equal = false; + } } return equal; } @@ -5270,7 +5301,8 @@ int ObRawExprUtils::build_column_conv_expr(ObRawExprFactory &expr_factory, if (col_ref.is_fulltext_column() || col_ref.is_spatial_generated_column() || col_ref.is_multivalue_generated_column() || - col_ref.is_multivalue_generated_array_column()) { + col_ref.is_multivalue_generated_array_column() || + col_ref.is_vec_index_column()) { // 全文列不会破坏约束性,且数据不会存储,跳过强转 // 空间索引列是虚拟列,跳过强转 } else if (OB_FAIL(build_column_conv_expr(session_info, @@ -5405,7 +5437,7 @@ int ObRawExprUtils::build_column_conv_expr(const ObSQLSessionInfo *session_info, } } if (OB_SUCC(ret)) { - if (ob_is_enumset_tc(dest_type) && OB_NOT_NULL(type_infos)) { + if ((ob_is_enumset_tc(dest_type) || ob_is_collection_sql_type(dest_type)) && OB_NOT_NULL(type_infos)) { ObExprColumnConv *column_conv = NULL; ObExprOperator *op = NULL; if (OB_ISNULL(op = f_expr->get_op())) { @@ -7079,7 +7111,7 @@ int ObRawExprUtils::init_column_expr(const ObColumnSchemaV2 &column_schema, ObCo LOG_WARN("extract column expr info failed", K(ret)); } } - if (OB_SUCC(ret) && column_schema.is_enum_or_set()) { + if (OB_SUCC(ret) && (column_schema.is_enum_or_set() || column_schema.is_collection())) { if (OB_FAIL(column_expr.set_enum_set_values(column_schema.get_extended_type_info()))) { LOG_WARN("failed to set enum set values", K(ret)); } @@ -7637,6 +7669,17 @@ int ObRawExprUniqueSet::flatten_and_add_raw_exprs(const ObRawExprUniqueSet &raw_ return flatten_and_add_raw_exprs(raw_exprs.get_expr_array(), filter, need_flatten_gen_col); } +int ObRawExprUniqueSet::flatten_and_add_attr_exprs(ObRawExpr *raw_expr) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; i < raw_expr->get_attr_count() && OB_SUCC(ret); i++) { + if (OB_FAIL(append(raw_expr->get_attr_expr(i)))) { + LOG_WARN("fail to push raw expr", K(ret), KPC(raw_expr)); + } + } + return ret; +} + int ObRawExprUniqueSet::flatten_and_add_raw_exprs(ObRawExpr *raw_expr, bool need_flatten_gen_col, std::function filter) @@ -7682,6 +7725,11 @@ int ObRawExprUniqueSet::flatten_and_add_raw_exprs(ObRawExpr *raw_expr, LOG_WARN("fail to flatten raw expr", K(ret), K(d_v_raw_expr)); } } + + if (OB_SUCC(ret) && ob_is_collection_sql_type(raw_expr->get_result_type().get_type()) + && flatten_and_add_attr_exprs(raw_expr)) { + LOG_WARN("fail to add attr raw expr", K(ret)); + } // flatten dependent expr if (OB_SUCC(ret) && T_REF_COLUMN == raw_expr->get_expr_type()) { ObColumnRefRawExpr *col_expr = static_cast(raw_expr); @@ -8189,6 +8237,10 @@ int ObRawExprUtils::check_need_cast_expr(const ObExprResType &src_type, } else if (ob_is_decimal_int(in_type) && decimal_int_need_cast(src_type.get_accuracy(), dst_type.get_accuracy())) { need_cast = true; + } else if (ob_is_collection_sql_type(in_type) + && src_type.get_subschema_id() != dst_type.get_subschema_id()) { + // array element cast + need_cast = true; } // mark as scale adjust cast to avoid repeat cast error if (need_cast) { @@ -8348,6 +8400,12 @@ int ObRawExprUtils::create_type_expr(ObRawExprFactory &expr_factory, dst_expr->set_udt_id(T_OBJ_XML); } else { dst_expr->set_udt_id(dst_type.get_udt_id()); + if (!dst_type.is_ext()) { + // udt or collection type, add subschema id + uint16_t subschema_id = dst_type.get_subschema_id(); + parse_node.int16_values_[OB_NODE_CAST_COLL_IDX] = ((subschema_id >> 8) & UINT_MAX8); + parse_node.int16_values_[OB_NODE_CAST_CS_LEVEL_IDX] = (subschema_id & UINT_MAX8); + } } } diff --git a/src/sql/resolver/expr/ob_raw_expr_util.h b/src/sql/resolver/expr/ob_raw_expr_util.h index 1da9a8885f..c61cd0e711 100644 --- a/src/sql/resolver/expr/ob_raw_expr_util.h +++ b/src/sql/resolver/expr/ob_raw_expr_util.h @@ -23,6 +23,7 @@ #include "sql/resolver/ob_resolver_utils.h" #include "lib/hash/ob_hashset.h" #include "lib/allocator/ob_allocator.h" +#include "lib/udt/ob_array_type.h" #include "share/schema/ob_trigger_info.h" namespace oceanbase @@ -138,6 +139,7 @@ private: bool need_flatten_gen_col = true, std::function filter = [](ObRawExpr *e){ return NULL != e;}); + int flatten_and_add_attr_exprs(ObRawExpr *raw_expr); DISALLOW_COPY_AND_ASSIGN(ObRawExprUniqueSet); private: ObSEArray expr_array_; @@ -483,6 +485,12 @@ public: static int implict_cast_sql_udt_to_pl_udt(ObRawExprFactory *expr_factory, const ObSQLSessionInfo *session, ObRawExpr* &real_ref_expr); + template + static int create_attr_expr(ObRawExprFactory *expr_factory, + const ObSQLSessionInfo *session, + ObItemType expr_type, + ArrayAttr attr_type, + RawExprType* &attr_expr); // new engine: may create more cast exprs to handle non-system-collation string. // e.g.: utf16->number: utf16->utf8->number (two cast expr) // utf8_bin->number: utf8->number (just one cat expr) diff --git a/src/sql/resolver/ob_resolver_utils.cpp b/src/sql/resolver/ob_resolver_utils.cpp index b419345026..a07cfaca57 100644 --- a/src/sql/resolver/ob_resolver_utils.cpp +++ b/src/sql/resolver/ob_resolver_utils.cpp @@ -409,7 +409,93 @@ int ObResolverUtils::add_dependency_synonym_object(share::schema::ObSchemaGetter } } } + return ret; +} +inline bool ObResolverUtils::is_collection_support_type(const ObObjType type) +{ + return (type == ObTinyIntType || type == ObSmallIntType || + type == ObInt32Type || type == ObIntType || + type == ObUTinyIntType || type == ObUSmallIntType || + type == ObUInt32Type || type == ObUInt64Type || + type == ObFloatType || type == ObDoubleType || + type == ObVarcharType || type == ObCollectionSQLType); +} + +int ObResolverUtils::resolve_collection_type_info(const ParseNode &type_node, ObStringBuffer &buf, uint8_t &depth, bool is_vector_child) +{ + int ret = OB_SUCCESS; + bool is_stack_overflow = false; + const uint8_t OB_ARRAY_MAX_NESTED_LEVEL = 6; /* constistent with pg*/ + bool is_vector = (type_node.type_ == T_COLLECTION && type_node.int32_values_[0] == 1); + if (OB_FAIL(check_stack_overflow(is_stack_overflow))) { + LOG_WARN("check stack overflow failed", K(ret)); + } else if (is_stack_overflow) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("too deep recursive", K(ret)); + } else if (type_node.type_ == T_COLLECTION && ++depth > OB_ARRAY_MAX_NESTED_LEVEL) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported array depth", K(ret), K(depth)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "ARRAY DEPTH exceeds the maximum allowed(6)"); + } else if (!is_collection_support_type(static_cast(type_node.type_))) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported element type", K(ret), K(type_node.type_)); + } else if (type_node.int32_values_[1]/*is binary*/ && (type_node.type_ == T_CHAR || type_node.type_ == T_VARCHAR)) { + if (OB_FAIL(buf.append(type_node.type_ == T_CHAR ? "BINARY" : "VARBINARY"))) { + LOG_WARN("failed to append type string", K(ret), K(type_node.type_)); + } + } else if (is_vector) { + // vector type + if (OB_FAIL(buf.append("VECTOR"))) { + LOG_WARN("failed to append type string", K(ret), K(type_node.type_)); + } + } else if (!is_vector_child && OB_FAIL(buf.append(ob_sql_type_str(static_cast(type_node.type_))))) { + LOG_WARN("failed to append type string", K(ret), K(type_node.type_)); + } + + const int MAX_LEN = 128; + char tmp[MAX_LEN] = {0}; + if (OB_FAIL(ret)) { + } else if (type_node.type_ == T_CHAR || type_node.type_ == T_VARCHAR + || type_node.type_ == T_DATETIME || type_node.type_ == T_TIMESTAMP + || type_node.type_ == T_TIME || type_node.type_ == T_BIT) { + bool is_char = (type_node.type_ == T_CHAR || type_node.type_ == T_VARCHAR); + bool is_bit = type_node.type_ == T_BIT; + int32_t length = is_bit ? type_node.int16_values_[0] + : (is_char ? type_node.int32_values_[0] : type_node.int16_values_[1]); + int64_t pos = 0; + if (OB_FAIL(databuff_printf(tmp, MAX_LEN, pos, "(%d)",length))) { + LOG_WARN("failed to convert len to string", K(ret), K(length)); + } else if (OB_FAIL(buf.append(tmp, pos))) { + LOG_WARN("failed to append string length", K(ret), K(type_node.type_)); + } + } else if (type_node.type_ == T_INT && is_vector_child) { + // vector dimension + int64_t pos = 0; + if (type_node.value_ <= 0 || type_node.value_ > 16000) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported vector column dim range", K(ret), K(type_node.value_)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "vector column dim less or equal to zero or larger than 16000 is"); + } else if (OB_FAIL(databuff_printf(tmp, MAX_LEN, pos, "%ld", type_node.value_))) { + LOG_WARN("failed to convert len to string", K(ret)); + } else if (OB_FAIL(buf.append(tmp, pos))) { + LOG_WARN("failed to append string length", K(ret), K(type_node.type_)); + } + } else if (type_node.type_ == T_NUMBER) { + int64_t pos = 0; + if (OB_FAIL(databuff_printf(tmp, MAX_LEN, pos, "(%d,%d)", type_node.int16_values_[0], type_node.int16_values_[1]))) { + LOG_WARN("failed to convert len to string", K(ret)); + } else if (OB_FAIL(buf.append(tmp, pos))) { + LOG_WARN("failed to append string length", K(ret), K(type_node.type_)); + } + } else if (type_node.type_ == T_COLLECTION && OB_FAIL(buf.append("("))) { + LOG_WARN("failed to append left bracket", K(ret), K(type_node.type_)); + } else if (type_node.type_ == T_COLLECTION && OB_NOT_NULL(type_node.children_[0]) + && OB_FAIL(resolve_collection_type_info(*type_node.children_[0], buf, depth, is_vector))) { + LOG_WARN("failed to resolve sub type info", K(ret), K(type_node.type_)); + } else if (type_node.type_ == T_COLLECTION && OB_FAIL(buf.append(")"))) { + LOG_WARN("failed to append right bracket", K(ret), K(type_node.type_)); + } return ret; } @@ -5235,6 +5321,23 @@ int ObResolverUtils::build_file_column_expr_for_file_url( return ret; } +int ObResolverUtils::generate_subschema_id(ObSQLSessionInfo &session_info, + const common::ObIArray &extended_type_info, + uint16_t &subschema_id) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(session_info.get_cur_exec_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("exec ctx is null", K(ret)); + } else if (extended_type_info.count() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected type name", K(ret), K(extended_type_info.count())); + } else if (OB_FAIL(session_info.get_cur_exec_ctx()->get_subschema_id_by_type_string(extended_type_info.at(0), subschema_id))) { + LOG_WARN("failed to get array type subschema id", K(ret)); + } + return ret; +} + // 解析生成列表达式时,首先在table_schema中的column_schema中寻找依赖的列,如果找不到,再在 resolved_cols中找 int ObResolverUtils::resolve_generated_column_expr(ObResolverParams ¶ms, const ParseNode *node, @@ -5486,8 +5589,17 @@ int ObResolverUtils::resolve_generated_column_expr(ObResolverParams ¶ms, cast_dst_type.set_accuracy(generated_column.get_accuracy()); cast_dst_type.set_collation_level(CS_LEVEL_IMPLICIT); ObRawExpr *expr_with_implicit_cast = NULL; + if (ob_is_collection_sql_type(cast_dst_type.get_type())) { + uint16_t subschema_id = 0; + if (OB_FAIL(generate_subschema_id(*session_info, generated_column.get_extended_type_info(), subschema_id))) { + LOG_WARN("generate subschema id failed", K(ret)); + } else { + cast_dst_type.set_subschema_id(subschema_id); + } + } //only formalize once - if (OB_FAIL(ObRawExprUtils::erase_operand_implicit_cast(expr, expr))) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObRawExprUtils::erase_operand_implicit_cast(expr, expr))) { LOG_WARN("fail to remove implicit cast", K(ret)); } else if (coltype_not_defined) { expr_with_implicit_cast = expr; @@ -6790,6 +6902,21 @@ int ObResolverUtils::resolve_data_type(const ParseNode &type_node, } //} break; + case ObCollectionSQLTC: { + uint64_t tenant_data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get tenant data version failed", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_3_0) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant version is less than 4.3.3, array type"); + } else { + data_type.set_length(length); + data_type.set_scale(default_accuracy.get_scale()); + data_type.set_charset_type(CHARSET_BINARY); + data_type.set_collation_type(CS_TYPE_INVALID); + } + break; + } case ObRoaringBitmapTC: { uint64_t tenant_data_version = 0; if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { diff --git a/src/sql/resolver/ob_resolver_utils.h b/src/sql/resolver/ob_resolver_utils.h index 0ebb450cc3..8c0a369793 100644 --- a/src/sql/resolver/ob_resolver_utils.h +++ b/src/sql/resolver/ob_resolver_utils.h @@ -157,6 +157,11 @@ public: static int resolve_extended_type_info(const ParseNode &str_list_node, ObIArray& type_info_array); + static int resolve_collection_type_info(const ParseNode &type_node, + ObStringBuffer &buf, + uint8_t &depth, + bool is_vector_child = false); + inline static bool is_collection_support_type(const ObObjType type); // type_infos is %ori_cs_type, need convert to %cs_type first static int check_extended_type_info(common::ObIAllocator &alloc, ObIArray &type_infos, @@ -888,6 +893,9 @@ public: static int64_t get_mysql_max_partition_num(const uint64_t tenant_id); static int check_schema_valid_for_mview(const share::schema::ObTableSchema &table_schema); + static int generate_subschema_id(ObSQLSessionInfo &session_info, + const common::ObIArray &extended_type_info, + uint16_t &subschema_id); static bool is_external_pseudo_column(const ObRawExpr &expr); static int cnt_external_pseudo_column(const ObRawExpr &expr, bool &contain); private: diff --git a/src/sql/resolver/ob_schema_checker.cpp b/src/sql/resolver/ob_schema_checker.cpp index 7e0de0a189..ad981cdb34 100644 --- a/src/sql/resolver/ob_schema_checker.cpp +++ b/src/sql/resolver/ob_schema_checker.cpp @@ -758,7 +758,8 @@ int ObSchemaChecker::get_simple_table_schema( int ObSchemaChecker::get_table_schema(const uint64_t tenant_id, const ObString &database_name, const ObString &table_name, const bool is_index_table, - const ObTableSchema *&table_schema) + const ObTableSchema *&table_schema, const bool with_hidden_flag, + const bool is_built_in_index) { int ret = OB_SUCCESS; table_schema = NULL; @@ -771,7 +772,7 @@ int ObSchemaChecker::get_table_schema(const uint64_t tenant_id, const ObString & ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(tenant_id), K(database_name), K(table_name), K(ret)); } else if (OB_FAIL(schema_mgr_->get_table_schema(tenant_id, database_name, table_name, - is_index_table, table))) { + is_index_table, table, with_hidden_flag, is_built_in_index))) { LOG_WARN("get table schema failed", K(tenant_id), K(database_name), K(table_name), K(ret)); } else if (NULL == table) { ret = OB_TABLE_NOT_EXIST; diff --git a/src/sql/resolver/ob_schema_checker.h b/src/sql/resolver/ob_schema_checker.h index 8ba7b9f383..991d820bf9 100644 --- a/src/sql/resolver/ob_schema_checker.h +++ b/src/sql/resolver/ob_schema_checker.h @@ -185,7 +185,9 @@ public: const common::ObString &database_name, const common::ObString &table_name, const bool is_index_table, - const share::schema::ObTableSchema *&table_schema); + const share::schema::ObTableSchema *&table_schema, + const bool with_hidden_flag = false, + const bool is_built_in_index = false); int get_table_schema(const uint64_t tenant_id, const uint64_t database_id, const common::ObString &table_name, diff --git a/src/sql/resolver/ob_stmt_resolver.cpp b/src/sql/resolver/ob_stmt_resolver.cpp index 224b233027..b9c5dffc20 100644 --- a/src/sql/resolver/ob_stmt_resolver.cpp +++ b/src/sql/resolver/ob_stmt_resolver.cpp @@ -126,7 +126,8 @@ int ObStmtResolver::resolve_table_relation_node_v2(const ParseNode *node, int tmp_ret = ObSQLUtils::check_and_convert_table_name(cs_type, perserve_lettercase, table_name); //因索引表存在前缀,故第1次检查table_name超长时,需要继续获取db信息以判断是否索引表 if (OB_SUCCESS == tmp_ret || OB_ERR_TOO_LONG_IDENT == tmp_ret - || (session_info_->get_ddl_info().is_ddl() && OB_WRONG_TABLE_NAME == tmp_ret)) { + || ((session_info_->get_ddl_info().is_ddl() || session_info_->get_ddl_info().is_dummy_ddl_for_inner_visibility()) && + OB_WRONG_TABLE_NAME == tmp_ret)) { if (NULL == db_node) { if (is_oracle_sys_view) { // ObString tmp(OB_ORA_SYS_SCHEMA_NAME); // right code diff --git a/src/sql/rewrite/ob_transformer_impl.cpp b/src/sql/rewrite/ob_transformer_impl.cpp index 0e87825af7..f156e73fd0 100644 --- a/src/sql/rewrite/ob_transformer_impl.cpp +++ b/src/sql/rewrite/ob_transformer_impl.cpp @@ -552,12 +552,43 @@ void ObTransformerImpl::print_trans_stat() } } + +int ObTransformerImpl::check_vec_approx(ObDMLStmt *stmt, bool &has_approx) +{ + int ret = OB_SUCCESS; + ObSEArray temp_table_infos; + if (OB_ISNULL(stmt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("stmt is NULL", K(ret)); + } else if (has_approx == true) { + // do nothing + } else if (stmt->has_vec_approx()) { + has_approx = true; + } else if (stmt->has_subquery()) { + int sub_query_size = stmt->get_subquery_expr_size(); + for (int64_t j = 0; OB_SUCC(ret) && j < sub_query_size && !has_approx; ++j) { + ObQueryRefRawExpr *subquery_ref = stmt->get_subquery_exprs().at(j); + ObDMLStmt *subquery_stmt = nullptr; + if (OB_ISNULL(subquery_ref) || + OB_ISNULL(subquery_stmt = subquery_ref->get_ref_stmt())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("subquery reference is null", K(subquery_ref)); + } else if (OB_FAIL(check_vec_approx(subquery_stmt, has_approx))) { + LOG_WARN("stored subquery reference stmt failed", K(ret)); + } + } + } + + return ret; +} + int ObTransformerImpl::choose_rewrite_rules(ObDMLStmt *stmt, uint64_t &need_types) { int ret = OB_SUCCESS; uint64_t disable_list = 0; StmtFunc func; ObSqlCtx *sql_ctx = NULL; + bool has_approx = false; if (OB_ISNULL(stmt) || OB_ISNULL(ctx_->exec_ctx_) || OB_ISNULL(sql_ctx = ctx_->exec_ctx_->get_sql_ctx())) { @@ -569,10 +600,12 @@ int ObTransformerImpl::choose_rewrite_rules(ObDMLStmt *stmt, uint64_t &need_type LOG_WARN("failed to check stmt functions", K(ret)); } else if (OB_FAIL(check_temp_table_functions(stmt, func))) { LOG_WARN("failed to check stmt functions", K(ret)); + } else if (OB_FAIL(check_vec_approx(stmt, has_approx))) { + LOG_WARN("failed to check vec approx", K(ret)); } else { //TODO::unpivot open @xifeng if (func.contain_unpivot_query_ || func.contain_enum_set_values_ || func.contain_geometry_values_ || - func.contain_fulltext_search_ || func.contain_dml_with_doc_id_) { + func.contain_fulltext_search_ || func.contain_dml_with_doc_id_ || has_approx) { disable_list = ObTransformRule::ALL_TRANSFORM_RULES; } if (func.contain_dml_with_doc_id_) { diff --git a/src/sql/rewrite/ob_transformer_impl.h b/src/sql/rewrite/ob_transformer_impl.h index f20892e887..363965e641 100644 --- a/src/sql/rewrite/ob_transformer_impl.h +++ b/src/sql/rewrite/ob_transformer_impl.h @@ -153,6 +153,7 @@ public: }; static int check_stmt_functions(const ObDMLStmt *stmt, StmtFunc &func); int check_temp_table_functions(ObDMLStmt *stmt, StmtFunc &func); + int check_vec_approx(ObDMLStmt *stmt, bool &has_approx); inline ObTransformerCtx *get_trans_ctx() { return ctx_; } int set_transformation_parameters(ObQueryCtx *query_ctx); private: diff --git a/src/sql/session/ob_basic_session_info.cpp b/src/sql/session/ob_basic_session_info.cpp index 0e6d5cb7de..009c118de9 100644 --- a/src/sql/session/ob_basic_session_info.cpp +++ b/src/sql/session/ob_basic_session_info.cpp @@ -3868,6 +3868,11 @@ int ObBasicSessionInfo::get_show_ddl_in_compat_mode(bool &show_ddl_in_compat_mod return get_bool_sys_var(SYS_VAR__SHOW_DDL_IN_COMPAT_MODE, show_ddl_in_compat_mode); } +int ObBasicSessionInfo::get_ob_hnsw_ef_search(uint64_t &ob_hnsw_ef_search) const +{ + return get_uint64_sys_var(SYS_VAR_OB_HNSW_EF_SEARCH, ob_hnsw_ef_search); +} + int ObBasicSessionInfo::get_sql_quote_show_create(bool &sql_quote_show_create) const { return get_bool_sys_var(SYS_VAR_SQL_QUOTE_SHOW_CREATE, sql_quote_show_create); diff --git a/src/sql/session/ob_basic_session_info.h b/src/sql/session/ob_basic_session_info.h index 80a31a4ae5..5040f7a500 100644 --- a/src/sql/session/ob_basic_session_info.h +++ b/src/sql/session/ob_basic_session_info.h @@ -569,6 +569,7 @@ public: common::ObIArray& get_enable_role_ids() { return enable_role_ids_; } const common::ObIArray& get_enable_role_ids() const { return enable_role_ids_; } int get_show_ddl_in_compat_mode(bool &show_ddl_in_compat_mode) const; + int get_ob_hnsw_ef_search(uint64_t &ob_hnsw_ef_search) const; int get_sql_quote_show_create(bool &sql_quote_show_create) const; common::ObConsistencyLevel get_consistency_level() const { return consistency_level_; }; bool is_zombie() const { return SESSION_KILLED == get_session_state();} diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 44ae7d35d3..b4817fab65 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -572,6 +572,7 @@ ob_set_subtarget(ob_storage access ob_set_subtarget(ob_storage ddl ddl/ob_build_index_task.cpp ddl/ob_complement_data_task.cpp + ddl/ob_delete_lob_meta_row_task.cpp ddl/ob_ddl_clog.cpp ddl/ob_ddl_merge_task.cpp ddl/ob_ddl_redo_log_replayer.cpp @@ -881,6 +882,13 @@ ob_set_subtarget(ob_storage mview mview/ob_major_mv_merge_info.cpp ) +ob_set_subtarget(ob_storage vector_index + vector_index/cmd/ob_vector_refresh_index_executor.cpp + vector_index/ob_vector_index_refresh.cpp + vector_index/ob_vector_refresh_idx_transaction.cpp + vector_index/ob_vector_index_sched_job_utils.cpp +) + ob_set_subtarget(ob_storage tenant_snapshot tenant_snapshot/ob_tenant_snapshot_service.cpp tenant_snapshot/ob_tenant_clone_service.cpp diff --git a/src/storage/access/ob_pushdown_aggregate.cpp b/src/storage/access/ob_pushdown_aggregate.cpp index fb1670b45a..1264845949 100644 --- a/src/storage/access/ob_pushdown_aggregate.cpp +++ b/src/storage/access/ob_pushdown_aggregate.cpp @@ -20,6 +20,7 @@ #include "storage/blocksstable/encoding/ob_micro_block_decoder.h" #include "storage/lob/ob_lob_manager.h" #include "sql/engine/expr/ob_datum_cast.h" +#include "sql/engine/expr/ob_array_expr_utils.h" namespace oceanbase { @@ -1975,7 +1976,8 @@ ObSumAggCell::ObSumAggCell(const ObAggCellBasicInfo &basic_info, common::ObIAllo copy_datum_func_(nullptr), cast_datum_(), sum_temp_buffer_(nullptr), - cast_temp_buffer_(nullptr) + cast_temp_buffer_(nullptr), + datum_allocator_("ObStorageAgg", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()) { agg_type_ = ObPDAggType::PD_SUM; result_datum_.set_null(); @@ -2004,6 +2006,7 @@ void ObSumAggCell::reset() } cast_datum_.reset(); ObAggCell::reset(); + datum_allocator_.reset(); } void ObSumAggCell::reuse() @@ -2016,6 +2019,7 @@ void ObSumAggCell::reuse() sum_use_int_flag_ = false; num_int_ = 0; // reset_aggregate_info(); + datum_allocator_.reset(); } void ObSumAggCell::clear_group_by_info() @@ -2151,6 +2155,13 @@ int ObSumAggCell::init(const bool is_group_by, sql::ObEvalCtx *eval_ctx) ret = ObSumAggCell::init_decimal_int_func(); break; } + case ObObjTypeClass::ObCollectionSQLTC: { + eval_func_ = &ObSumAggCell::eval_vector; + eval_batch_func_ = &ObSumAggCell::eval_vector_batch; + copy_datum_func_ = &ObSumAggCell::copy_vector; + eval_skip_index_func_ = &ObSumAggCell::eval_vector; + break; + } default: { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected type", K(ret), K(obj_tc_)); @@ -2571,6 +2582,13 @@ int ObSumAggCell::copy_number(const ObDatum &datum, ObDatum &result_datum) return ret; } +int ObSumAggCell::copy_vector(const ObDatum &datum, ObDatum &result_datum) +{ + int ret = OB_SUCCESS; + result_datum.set_string(datum.get_string()); + return ret; +} + template int ObSumAggCell::copy_decimal_int(const ObDatum &datum, ObDatum &result_datum) { @@ -2723,6 +2741,21 @@ int ObSumAggCell::eval_number(const common::ObDatum &datum, const int32_t datum_ return ret; } +int ObSumAggCell::eval_vector(const common::ObDatum &datum, const int32_t datum_offset) +{ + int ret = OB_SUCCESS; + common::ObDatum &result_datum = get_group_by_result_datum(datum_offset); + if (datum.is_null()) { + } else if (result_datum.is_null()) { + if (OB_FAIL(result_datum.deep_copy(datum, datum_allocator_))) { + LOG_WARN("fail to deep copy datum", K(ret)); + } + } else if (OB_FAIL(ObArrayExprUtils::vector_datum_add(result_datum, datum, datum_allocator_))){ + LOG_WARN("fail to add vector", K(ret)); + } + return ret; +} + int ObSumAggCell::init_eval_skip_index_func_for_decimal() { int ret = OB_SUCCESS; @@ -2898,6 +2931,17 @@ int ObSumAggCell::eval_number_batch(const common::ObDatum *datums, const int64_t return ret; } +int ObSumAggCell::eval_vector_batch(const common::ObDatum *datums, const int64_t count) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < count; ++i) { + if (OB_FAIL(eval_vector(datums[i], -1))) { + LOG_WARN("Failed to eval float", K(ret)); + } + } + return ret; +} + template int ObSumAggCell::eval_decimal_int_batch(const common::ObDatum *datums, const int64_t count) { diff --git a/src/storage/access/ob_pushdown_aggregate.h b/src/storage/access/ob_pushdown_aggregate.h index e56f6b9573..b2b23f40e3 100644 --- a/src/storage/access/ob_pushdown_aggregate.h +++ b/src/storage/access/ob_pushdown_aggregate.h @@ -572,6 +572,7 @@ private: int eval_float(const common::ObDatum &datum, const int32_t datum_offset); int eval_double(const common::ObDatum &datum, const int32_t datum_offset); int eval_number(const common::ObDatum &datum, const int32_t datum_offset); + int eval_vector(const common::ObDatum &datum, const int32_t datum_offset); template int eval_number_decimal_int(const common::ObDatum &datum, const int32_t datum_offset); int init_eval_skip_index_func_for_decimal(); @@ -586,6 +587,7 @@ private: int eval_float_batch(const common::ObDatum *datums, const int64_t count); int eval_double_batch(const common::ObDatum *datums, const int64_t count); int eval_number_batch(const common::ObDatum *datums, const int64_t count); + int eval_vector_batch(const common::ObDatum *datums, const int64_t count); template int eval_decimal_int_batch(const common::ObDatum *datums, const int64_t count); template @@ -609,6 +611,7 @@ private: int copy_float(const ObDatum &datum, ObDatum &result_datum); int copy_double(const ObDatum &datum, ObDatum &result_datum); int copy_number(const ObDatum &datum, ObDatum &result_datum); + int copy_vector(const ObDatum &datum, ObDatum &result_datum); template int copy_decimal_int(const ObDatum &datum, ObDatum &result_datum); template @@ -640,6 +643,7 @@ private: blocksstable::ObStorageDatum cast_datum_; char *sum_temp_buffer_; char *cast_temp_buffer_; + common::ObArenaAllocator datum_allocator_; }; // mysql compatibility, select a,count(a), output first value of a diff --git a/src/storage/blocksstable/cs_encoding/ob_micro_block_cs_decoder.cpp b/src/storage/blocksstable/cs_encoding/ob_micro_block_cs_decoder.cpp index dfa3f8c970..6afe3aaee6 100644 --- a/src/storage/blocksstable/cs_encoding/ob_micro_block_cs_decoder.cpp +++ b/src/storage/blocksstable/cs_encoding/ob_micro_block_cs_decoder.cpp @@ -1869,8 +1869,9 @@ int ObMicroBlockCSDecoder::get_rows( const int32_t col_id = cols.at(i); sql::ObExpr &expr = *(exprs.at(i)); const bool need_padding = nullptr != col_params.at(i) && col_params.at(i)->get_meta_type().is_fixed_len_char_type(); + const bool need_dispatch_collection = nullptr != col_params.at(i) && col_params.at(i)->get_meta_type().is_collection_sql_type(); if (0 == vec_offset) { - const VectorFormat format = need_padding ? VectorFormat::VEC_DISCRETE : expr.get_default_res_format(); + const VectorFormat format = (need_padding || need_dispatch_collection) ? VectorFormat::VEC_DISCRETE : expr.get_default_res_format(); if (OB_FAIL(storage::init_expr_vector_header(expr, eval_ctx, eval_ctx.max_batch_size_, format))) { LOG_WARN("Failed to init vector", K(ret)); } @@ -1889,6 +1890,9 @@ int ObMicroBlockCSDecoder::get_rows( expr, eval_ctx))) { LOG_WARN("Failed pad on rich format columns", K(ret), K(expr)); + } else if (need_dispatch_collection + && OB_FAIL(storage::distribute_attrs_on_rich_format_columns(row_cap, vec_offset, expr, eval_ctx))) { + LOG_WARN("failed to dispatch collection cells", K(ret), K(i), K(row_cap), K(vec_offset)); } } } diff --git a/src/storage/blocksstable/encoding/ob_micro_block_decoder.cpp b/src/storage/blocksstable/encoding/ob_micro_block_decoder.cpp index 00a6d1d10d..17cfc16340 100644 --- a/src/storage/blocksstable/encoding/ob_micro_block_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_micro_block_decoder.cpp @@ -2239,8 +2239,9 @@ int ObMicroBlockDecoder::get_rows( const int32_t col_id = cols.at(i); sql::ObExpr &expr = *(exprs.at(i)); const bool need_padding = nullptr != col_params.at(i) && col_params.at(i)->get_meta_type().is_fixed_len_char_type(); + const bool need_dispatch_collection = nullptr != col_params.at(i) && col_params.at(i)->get_meta_type().is_collection_sql_type(); if (0 == vec_offset) { - const VectorFormat format = need_padding ? VectorFormat::VEC_DISCRETE : expr.get_default_res_format(); + const VectorFormat format = (need_padding || need_dispatch_collection) ? VectorFormat::VEC_DISCRETE : expr.get_default_res_format(); if (OB_FAIL(storage::init_expr_vector_header(expr, eval_ctx, eval_ctx.max_batch_size_, format))) { LOG_WARN("Fail to init vector", K(ret)); } @@ -2259,6 +2260,9 @@ int ObMicroBlockDecoder::get_rows( expr, eval_ctx))) { LOG_WARN("Failed pad on rich format columns", K(ret), K(expr)); + } else if (need_dispatch_collection + && OB_FAIL(storage::distribute_attrs_on_rich_format_columns(row_cap, vec_offset, expr, eval_ctx))) { + LOG_WARN("failed to dispatch collection cells", K(ret), K(i), K(row_cap), K(vec_offset)); } } } diff --git a/src/storage/blocksstable/index_block/ob_index_block_util.h b/src/storage/blocksstable/index_block/ob_index_block_util.h index c2a807aae4..d9d365454c 100644 --- a/src/storage/blocksstable/index_block/ob_index_block_util.h +++ b/src/storage/blocksstable/index_block/ob_index_block_util.h @@ -157,7 +157,7 @@ OB_INLINE static int get_skip_index_store_upper_size( OB_INLINE static bool is_skip_index_black_list_type(const ObObjType &obj_type) { return ObNullType == obj_type || ob_is_json_tc(obj_type) || ob_is_geometry_tc(obj_type) - || ob_is_user_defined_sql_type(obj_type) || ob_is_roaringbitmap_tc(obj_type); + || ob_is_user_defined_sql_type(obj_type) || ob_is_roaringbitmap_tc(obj_type) || ob_is_collection_sql_type(obj_type); } OB_INLINE static bool is_skip_index_while_list_type(const ObObjType &obj_type) diff --git a/src/storage/blocksstable/ob_micro_block_reader.cpp b/src/storage/blocksstable/ob_micro_block_reader.cpp index f2352c08a4..d0f730da22 100644 --- a/src/storage/blocksstable/ob_micro_block_reader.cpp +++ b/src/storage/blocksstable/ob_micro_block_reader.cpp @@ -965,6 +965,7 @@ int ObMicroBlockReader::get_rows( if (OB_SUCC(ret)) { for (int64_t i = 0; OB_SUCC(ret) && i < cols_projector.count(); ++i) { const bool need_padding = nullptr != col_params.at(i) && col_params.at(i)->get_meta_type().is_fixed_len_char_type(); + const bool need_dispatch_collection = nullptr != col_params.at(i) && col_params.at(i)->get_meta_type().is_collection_sql_type(); if (need_padding) { if (OB_FAIL(storage::pad_on_rich_format_columns( col_params.at(i)->get_accuracy(), @@ -976,6 +977,9 @@ int ObMicroBlockReader::get_rows( eval_ctx))) { LOG_WARN("Failed pad on rich format columns", K(ret), KPC(exprs.at(i))); } + } else if (need_dispatch_collection + && OB_FAIL(storage::distribute_attrs_on_rich_format_columns(row_cap, vector_offset, *(exprs.at(i)), eval_ctx))) { + LOG_WARN("failed to dispatch collection cells", K(ret), K(i), K(row_cap), K(vector_offset)); } } } diff --git a/src/storage/blocksstable/ob_sstable_printer.cpp b/src/storage/blocksstable/ob_sstable_printer.cpp index 4ae38a0d32..b96ce4db13 100644 --- a/src/storage/blocksstable/ob_sstable_printer.cpp +++ b/src/storage/blocksstable/ob_sstable_printer.cpp @@ -96,7 +96,8 @@ static const char * OB_OBJ_TYPE_NAMES[ObMaxType] = { "ObTimestampTZType", "ObTimestampLTZType", "ObTimestampNanoType", "ObRawType", "ObIntervalYMType", "ObIntervalDSType", "ObNumberFloatType", "ObNVarchar2Type", "ObNCharType", "ObURowIDType", "ObLobType", - "ObJsonType", "ObGeometryType", "ObUserDefinedSQLType","ObDecimalIntType" + "ObJsonType", "ObGeometryType", "ObUserDefinedSQLType","ObDecimalIntType", + "ObCollectionSQLType" }; void ObSSTablePrinter::print_title(const char *title, const int64_t level) diff --git a/src/storage/ddl/ob_build_index_task.cpp b/src/storage/ddl/ob_build_index_task.cpp index 4729aa7562..1a5f7d6994 100644 --- a/src/storage/ddl/ob_build_index_task.cpp +++ b/src/storage/ddl/ob_build_index_task.cpp @@ -545,7 +545,7 @@ int ObUniqueIndexChecker::check_unique_index(ObIDag *dag) LOG_WARN("fail to get log stream", K(ret), K(ls_id_)); } else if (OB_FAIL(ObDDLUtil::ddl_get_tablet(ls_handle, tablet_id_, tablet_handle_))) { LOG_WARN("fail to get tablet", K(ret), K(tablet_id_), K(tablet_handle_)); - } else if (index_schema_->is_fts_index()) { + } else if (index_schema_->is_fts_index() || index_schema_->is_vec_index()) { STORAGE_LOG(INFO, "do not need to check unique for domain index", "index_id", index_schema_->get_table_id()); } else { if (OB_FAIL(ret)) { diff --git a/src/storage/ddl/ob_ddl_lock.cpp b/src/storage/ddl/ob_ddl_lock.cpp index 44e6bf4fb3..cbdf7b65d6 100644 --- a/src/storage/ddl/ob_ddl_lock.cpp +++ b/src/storage/ddl/ob_ddl_lock.cpp @@ -136,14 +136,14 @@ int ObDDLLock::lock_for_add_drop_index( int ObDDLLock::unlock_for_add_drop_index( const ObTableSchema &data_table_schema, - const ObTableSchema &index_schema, + const uint64_t index_table_id, + const bool is_global_index, const ObTableLockOwnerID lock_owner, ObMySQLTransaction &trans) { int ret = OB_SUCCESS; - const uint64_t tenant_id = index_schema.get_tenant_id(); + const uint64_t tenant_id = data_table_schema.get_tenant_id(); const uint64_t data_table_id = data_table_schema.get_table_id(); - const uint64_t index_table_id = index_schema.get_table_id(); const int64_t timeout_us = DEFAULT_TIMEOUT; ObSEArray data_tablet_ids; bool some_lock_not_exist = false; @@ -157,7 +157,7 @@ int ObDDLLock::unlock_for_add_drop_index( LOG_WARN("failed to unlock data tablet", K(ret)); } else if (OB_FAIL(ObOnlineDDLLock::unlock_table(tenant_id, data_table_id, ROW_EXCLUSIVE, lock_owner, timeout_us, trans, some_lock_not_exist))) { LOG_WARN("failed to unlock data table", K(ret)); - } else if (!index_schema.is_storage_local_index_table()) { + } else if (is_global_index) { if (OB_FAIL(ObOnlineDDLLock::unlock_table(tenant_id, index_table_id, EXCLUSIVE, lock_owner, timeout_us, trans, some_lock_not_exist))) { LOG_WARN("failed to unlock index table", K(ret)); } @@ -165,6 +165,77 @@ int ObDDLLock::unlock_for_add_drop_index( return ret; } +int ObDDLLock::lock_for_rebuild_index( + const share::schema::ObTableSchema &data_table_schema, + const uint64_t old_index_table_id, + const uint64_t new_index_table_id, + const bool is_global_index, + const transaction::tablelock::ObTableLockOwnerID lock_owner, + ObMySQLTransaction &trans) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = data_table_schema.get_tenant_id(); + const uint64_t data_table_id = data_table_schema.get_table_id(); + const int64_t timeout_us = DEFAULT_TIMEOUT; + ObSEArray data_tablet_ids; + ObInnerSQLConnection *iconn = nullptr; + if (data_table_schema.is_user_hidden_table()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lock for rebuild hidden table index", K(ret)); + } else if (!need_lock(data_table_schema)) { + LOG_INFO("skip ddl lock", K(data_table_id)); + } else if (OB_FAIL(data_table_schema.get_tablet_ids(data_tablet_ids))) { + LOG_WARN("failed to get data tablet ids", K(ret)); + } else if (OB_FAIL(ObOnlineDDLLock::lock_table(tenant_id, data_table_id, ROW_EXCLUSIVE, lock_owner, timeout_us, trans))) { + LOG_WARN("failed to lock data table", K(ret)); + } else if (OB_FAIL(ObOnlineDDLLock::lock_tablets(tenant_id, data_tablet_ids, ROW_EXCLUSIVE, lock_owner, timeout_us, trans))) { + LOG_WARN("failed to lock data table tablet", K(ret)); + } else if (OB_FAIL(do_table_lock(tenant_id, data_table_id, data_tablet_ids, ROW_SHARE, lock_owner, timeout_us, true/*is_lock*/, trans))) { + LOG_WARN("failed to lock data tablet", K(ret)); + } else if (is_global_index) { + if (OB_FAIL(ObOnlineDDLLock::lock_table(tenant_id, old_index_table_id, EXCLUSIVE, lock_owner, timeout_us, trans))) { + LOG_WARN("failed to lock index table", K(ret)); + } else if (OB_FAIL(ObOnlineDDLLock::lock_table(tenant_id, new_index_table_id, EXCLUSIVE, lock_owner, timeout_us, trans))) { + LOG_WARN("failed to lock index table", K(ret)); + } + } + return ret; +} + +int ObDDLLock::unlock_for_rebuild_index( + const share::schema::ObTableSchema &data_table_schema, + const uint64_t old_index_table_id, + const uint64_t new_index_table_id, + const bool is_global_index, + const transaction::tablelock::ObTableLockOwnerID lock_owner, + ObMySQLTransaction &trans) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = data_table_schema.get_tenant_id(); + const uint64_t data_table_id = data_table_schema.get_table_id(); + const int64_t timeout_us = DEFAULT_TIMEOUT; + ObSEArray data_tablet_ids; + bool some_lock_not_exist = false; + if (!need_lock(data_table_schema) || data_table_schema.is_user_hidden_table()) { + LOG_INFO("skip ddl lock", K(data_table_id)); + } else if (OB_FAIL(data_table_schema.get_tablet_ids(data_tablet_ids))) { + LOG_WARN("failed to get data tablet ids", K(ret)); + } else if (OB_FAIL(do_table_lock(tenant_id, data_table_id, data_tablet_ids, ROW_SHARE, lock_owner, timeout_us, false/*is_lock*/, trans))) { + LOG_WARN("failed to unlock data tablet", K(ret)); + } else if (OB_FAIL(ObOnlineDDLLock::unlock_tablets(tenant_id, data_tablet_ids, ROW_EXCLUSIVE, lock_owner, timeout_us, trans, some_lock_not_exist))) { + LOG_WARN("failed to unlock data tablet", K(ret)); + } else if (OB_FAIL(ObOnlineDDLLock::unlock_table(tenant_id, data_table_id, ROW_EXCLUSIVE, lock_owner, timeout_us, trans, some_lock_not_exist))) { + LOG_WARN("failed to unlock data table", K(ret)); + } else if (is_global_index) { + if (OB_FAIL(ObOnlineDDLLock::unlock_table(tenant_id, old_index_table_id, EXCLUSIVE, lock_owner, timeout_us, trans, some_lock_not_exist))) { + LOG_WARN("failed to unlock index table", K(ret)); + } else if (OB_FAIL(ObOnlineDDLLock::unlock_table(tenant_id, new_index_table_id, EXCLUSIVE, lock_owner, timeout_us, trans, some_lock_not_exist))) { + LOG_WARN("failed to unlock index table", K(ret)); + } + } + return ret; +} + int ObDDLLock::lock_for_add_lob_in_trans( const ObTableSchema &data_table_schema, ObMySQLTransaction &trans) diff --git a/src/storage/ddl/ob_ddl_lock.h b/src/storage/ddl/ob_ddl_lock.h index dda9e60b42..9514b4f62f 100644 --- a/src/storage/ddl/ob_ddl_lock.h +++ b/src/storage/ddl/ob_ddl_lock.h @@ -39,8 +39,23 @@ public: const transaction::tablelock::ObTableLockOwnerID lock_owner, ObMySQLTransaction &trans); static int unlock_for_add_drop_index( + const ObTableSchema &data_table_schema, + const uint64_t index_table_id, + const bool is_global_index, + const ObTableLockOwnerID lock_owner, + ObMySQLTransaction &trans); + static int lock_for_rebuild_index( const share::schema::ObTableSchema &data_table_schema, - const share::schema::ObTableSchema &index_schema, + const uint64_t old_index_table_id, + const uint64_t new_index_table_id, + const bool is_global_index, + const transaction::tablelock::ObTableLockOwnerID lock_owner, + ObMySQLTransaction &trans); + static int unlock_for_rebuild_index( + const share::schema::ObTableSchema &data_table_schema, + const uint64_t old_index_table_id, + const uint64_t new_index_table_id, + const bool is_global_index, const transaction::tablelock::ObTableLockOwnerID lock_owner, ObMySQLTransaction &trans); diff --git a/src/storage/ddl/ob_delete_lob_meta_row_task.cpp b/src/storage/ddl/ob_delete_lob_meta_row_task.cpp new file mode 100644 index 0000000000..69ff24d704 --- /dev/null +++ b/src/storage/ddl/ob_delete_lob_meta_row_task.cpp @@ -0,0 +1,488 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE +#include "ob_delete_lob_meta_row_task.h" +#include "lib/utility/ob_tracepoint.h" +#include "logservice/ob_log_service.h" +#include "share/ob_dml_sql_splicer.h" +#include "share/ob_ddl_checksum.h" +#include "share/ob_ddl_error_message_table_operator.h" +#include "share/ob_get_compat_mode.h" +#include "share/ob_ddl_task_executor.h" +#include "share/schema/ob_tenant_schema_service.h" +#include "share/ob_ddl_sim_point.h" +#include "share/scheduler/ob_dag_warning_history_mgr.h" +#include "storage/compaction/ob_column_checksum_calculator.h" +#include "storage/ddl/ob_ddl_redo_log_writer.h" +#include "storage/ob_i_table.h" +#include "observer/ob_server_struct.h" +#include "observer/ob_server_event_history_table_operator.h" +#include "storage/blocksstable/ob_datum_row.h" +#include "storage/ob_sstable_struct.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "storage/tx/ob_trans_service.h" +#include "storage/tx_storage/ob_access_service.h" +#include "storage/access/ob_table_scan_iterator.h" + +namespace oceanbase +{ +using namespace common; +using namespace storage; +using namespace compaction; +using namespace share; +using namespace share::schema; +using namespace sql; +using namespace observer; +using namespace name; + +namespace storage +{ + +int ObDeleteLobMetaRowParam::init(const ObDDLBuildSingleReplicaRequestArg &arg) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = arg.tenant_id_; + const int64_t table_id = arg.source_table_id_; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("ObDeleteLobMetaRowParam has been inited before", K(ret)); + } else if (OB_UNLIKELY(!arg.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arg", K(ret), K(arg)); + } else if (OB_FAIL(ObCompatModeGetter::get_table_compat_mode(tenant_id, table_id, compat_mode_))) { + LOG_WARN("failed to get compat mode", K(ret), K(arg)); + } + + if (OB_SUCC(ret)) { + is_inited_ = true; + tenant_id_ = tenant_id; + table_id_ = table_id; + schema_id_ = arg.dest_schema_id_; + schema_version_ = arg.schema_version_; + tablet_id_ = arg.source_tablet_id_; + dest_tablet_id_ = arg.dest_tablet_id_; + ls_id_ = arg.ls_id_; + task_id_ = arg.task_id_; + execution_id_ = arg.execution_id_; + tablet_task_id_ = arg.tablet_task_id_; + data_format_version_ = arg.data_format_version_; + snapshot_version_ = arg.snapshot_version_; + FLOG_INFO("succeed to init ObDeleteLobMetaRowParam", K(ret), KPC(this)); + } + return ret; +} +ObDeleteLobMetaRowDag::ObDeleteLobMetaRowDag() + : ObIDag(ObDagType::DAG_TYPE_DDL_DEL_LOB_META), is_inited_(false), param_() +{ +} + + +ObDeleteLobMetaRowDag::~ObDeleteLobMetaRowDag() +{ +} + +int ObDeleteLobMetaRowDag::init(const ObDDLBuildSingleReplicaRequestArg &arg) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("ObDeleteLobMetaRowDag has already been inited", K(ret)); + } else if (OB_UNLIKELY(!arg.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(arg)); + } else if (OB_FAIL(param_.init(arg))) { + LOG_WARN("fail to init dag param", K(ret)); + } else if (OB_UNLIKELY(!param_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("error unexpected", K(ret), K(param_)); + } else { + consumer_group_id_ = arg.consumer_group_id_; + is_inited_ = true; + } + return ret; +} + +int ObDeleteLobMetaRowDag::create_first_task() +{ + int ret = OB_SUCCESS; + ObDeleteLobMetaRowTask *delete_task = nullptr; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(alloc_task(delete_task))) { + LOG_WARN("allocate task failed", K(ret)); + } else if (OB_ISNULL(delete_task)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr task", K(ret)); + } else if (OB_FAIL(delete_task->init(param_))) { + LOG_WARN("init prepare task failed", K(ret)); + } else if (OB_FAIL(add_task(*delete_task))) { + LOG_WARN("add task failed", K(ret)); + } + return ret; +} + +bool ObDeleteLobMetaRowDag::ignore_warning() +{ + return OB_EAGAIN == dag_ret_ + || OB_NEED_RETRY == dag_ret_ + || OB_TASK_EXPIRED == dag_ret_; +} + +int64_t ObDeleteLobMetaRowDag::hash() const +{ + int tmp_ret = OB_SUCCESS; + int64_t hash_val = 0; + if (OB_UNLIKELY(!is_inited_ || !param_.is_valid())) { + tmp_ret = OB_ERR_SYS; + LOG_ERROR("table schema must not be NULL", K(tmp_ret), K(is_inited_), K(param_)); + } else { + hash_val = param_.tenant_id_ + + param_.table_id_ + + param_.schema_id_ + + param_.ls_id_.hash() + + param_.tablet_id_.hash() + + param_.dest_tablet_id_.hash() + + ObDagType::DAG_TYPE_DDL_DEL_LOB_META; + } + return hash_val; +} + +bool ObDeleteLobMetaRowDag::operator==(const ObIDag &other) const +{ + int tmp_ret = OB_SUCCESS; + bool is_equal = false; + if (OB_UNLIKELY(this == &other)) { + is_equal = true; + } else if (get_type() == other.get_type()) { + const ObDeleteLobMetaRowDag &dag = static_cast(other); + if (OB_UNLIKELY(!param_.is_valid() || !dag.param_.is_valid())) { + tmp_ret = OB_ERR_SYS; + LOG_ERROR("invalid argument", K(tmp_ret), K(param_), K(dag.param_)); + } else { + is_equal = (param_.tenant_id_ == dag.param_.tenant_id_) && (param_.tenant_id_ == dag.param_.tenant_id_) && + (param_.table_id_ == dag.param_.table_id_) && (param_.schema_id_ == dag.param_.schema_id_) && + (param_.ls_id_ == dag.param_.ls_id_) && (param_.tablet_id_ == dag.param_.tablet_id_) && + (param_.dest_tablet_id_ == dag.param_.dest_tablet_id_) && + (param_.delete_lob_meta_ret_ == dag.param_.delete_lob_meta_ret_); + } + } + return is_equal; +} + +int ObDeleteLobMetaRowDag::fill_info_param(compaction::ObIBasicInfoParam *&out_param, ObIAllocator &allocator) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDeleteLobMetaRowDag has not been initialized", K(ret)); + } else if (OB_UNLIKELY(!param_.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid param", K(ret), K(param_)); + } else if (OB_FAIL(ADD_DAG_WARN_INFO_PARAM(out_param, allocator, get_type(), + param_.ls_id_.id(), + static_cast(param_.table_id_), + static_cast(param_.tablet_id_.id()), + static_cast(param_.dest_tablet_id_.id()), + param_.schema_version_, + param_.snapshot_version_))) { + LOG_WARN("failed to fill info param", K(ret)); + } + return ret; +} + +int ObDeleteLobMetaRowDag::fill_dag_key(char *buf, const int64_t buf_len) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDeleteLobMetaRowDag has not been initialized", K(ret)); + } else if (OB_UNLIKELY(!param_.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid params", K(ret), K(param_)); + } else if (OB_FAIL(databuff_printf(buf, buf_len, "logstream_id=%ld tablet_id=%ld lob_meta_tablet_id=%ld", + param_.ls_id_.id(), param_.tablet_id_.id(), param_.dest_tablet_id_.id()))) { + LOG_WARN("fill dag key for ddl table merge dag failed", K(ret), K(param_)); + } + return ret; +} + +int ObDeleteLobMetaRowDag::report_replica_build_status() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObComplementDataDag has not been inited", K(ret)); + } else if (OB_UNLIKELY(!param_.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid param", K(ret), K(param_)); + } else { +#ifdef ERRSIM + if (OB_SUCC(ret)) { + ret = OB_E(EventTable::EN_DDL_REPORT_REPLICA_BUILD_STATUS_FAIL) OB_SUCCESS; + LOG_INFO("report replica build status errsim", K(ret)); + } +#endif + obrpc::ObDDLBuildSingleReplicaResponseArg arg; + ObAddr rs_addr; + arg.tenant_id_ = param_.tenant_id_; + arg.dest_tenant_id_ = param_.tenant_id_; + arg.ls_id_ = param_.ls_id_; + arg.dest_ls_id_ = param_.ls_id_; + arg.tablet_id_ = param_.tablet_id_; + arg.source_table_id_ = param_.table_id_; + arg.dest_schema_id_ = param_.schema_id_; + arg.ret_code_ = param_.delete_lob_meta_ret_; + arg.snapshot_version_ = param_.snapshot_version_; + arg.schema_version_ = param_.schema_version_; + arg.dest_schema_version_ = param_.schema_version_; + arg.task_id_ = param_.task_id_; + arg.execution_id_ = param_.execution_id_; + arg.server_addr_ = GCTX.self_addr(); + FLOG_INFO("send replica build status response to RS", K(ret), K(arg)); + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(GCTX.rs_rpc_proxy_) || OB_ISNULL(GCTX.rs_mgr_)) { + ret = OB_ERR_SYS; + LOG_WARN("innner system error, rootserver rpc proxy or rs mgr must not be NULL", K(ret), KP(GCTX.rs_mgr_)); + } else if (OB_FAIL(GCTX.rs_mgr_->get_master_root_server(rs_addr))) { + LOG_WARN("fail to get rootservice address", K(ret)); + } else if (OB_FAIL(GCTX.rs_rpc_proxy_->to(rs_addr).build_ddl_single_replica_response(arg))) { + LOG_WARN("fail to send build ddl single replica response", K(ret), K(arg)); + } + } + return ret; +} + +ObDeleteLobMetaRowTask::ObDeleteLobMetaRowTask() + : ObITask(TASK_TYPE_DELETE_LOB_META_ROW), is_inited_(false), param_(nullptr) +{ +} + +ObDeleteLobMetaRowTask::~ObDeleteLobMetaRowTask() +{ +} + +int ObDeleteLobMetaRowTask::init(ObDeleteLobMetaRowParam ¶m) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("ObDeleteLobMetaRowTask has already been inited", K(ret)); + } else if (OB_UNLIKELY(!param.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(param)); + } else { + param_ = ¶m; + is_inited_ = true; + } + return ret; +} + +int ObDeleteLobMetaRowTask::init_scan_param(ObTableScanParam& scan_param) +{ + int ret = OB_SUCCESS; + const ObTenantSchema *tenant_schema = nullptr; + const ObTableSchema *table_schema = nullptr; + ObSchemaGetterGuard schema_guard; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDeleteLobMetaRowTask has not been inited", K(ret)); + } else { + const uint64_t tenant_id = param_->tenant_id_; + const int64_t table_id = param_->table_id_; + const int64_t schema_version = param_->schema_version_; + if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard( + tenant_id, schema_guard))) { + LOG_WARN("get tenant schema failed", K(ret), K(tenant_id)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, table_id, table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(table_id)); + } else if (OB_ISNULL(table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("table not exist", K(ret), K(table_id), K(tenant_id)); + } else { + scan_param.tablet_id_ = param_->tablet_id_; + scan_param.schema_version_ = param_->schema_version_; + scan_param.is_get_ = false; + scan_param.ls_id_ = param_->ls_id_; + ObQueryFlag query_flag(ObQueryFlag::Forward, // scan_order + false, // daily_merge + false, // optimize + false, // sys scan + true, // full_row + false, // index_back + false, // query_stat + ObQueryFlag::MysqlMode, // sql_mode + false // read_latest + ); + scan_param.scan_flag_.flag_ = query_flag.flag_; + scan_param.key_ranges_.set_attr(ObMemAttr(tenant_id, "ScanParamKR")); + scan_param.ss_key_ranges_.set_attr(ObMemAttr(tenant_id, "ScanParamSSKR")); + scan_param.index_id_ = 0; + for (uint32_t i = 0; OB_SUCC(ret) && i < table_schema->get_column_count(); i++) { + const ObColumnSchemaV2 *column_schema = table_schema->get_column_schema_by_idx(i); + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, column schema is nullptr", K(ret), K(i), KPC(this)); + } else if (column_schema->get_data_type() != ObLongTextType) { + // do nothing + } else if (OB_FAIL(scan_param.column_ids_.push_back(column_schema->get_column_id()))) { + LOG_WARN("push col id failed.", K(ret), K(i)); + } else { + collation_type_ = column_schema->get_collation_type(); + } + } + if (OB_SUCC(ret)) { + scan_param.reserved_cell_count_ = scan_param.column_ids_.count(); + // table param + scan_param.index_id_ = 0; // table id + // set timeout + scan_param.timeout_ =INT64_MAX; + // scan_param.virtual_column_exprs_ + scan_param.limit_param_.limit_ = -1; + scan_param.limit_param_.offset_ = 0; + scan_param.sql_mode_ = SMO_DEFAULT; + // common set + scan_param.allocator_ = &(param_->allocator_); + scan_param.for_update_ = false; + scan_param.for_update_wait_timeout_ = scan_param.timeout_; + scan_param.scan_allocator_ = &(param_->allocator_); + scan_param.frozen_version_ = -1; + scan_param.force_refresh_lc_ = false; + scan_param.output_exprs_ = nullptr; + scan_param.aggregate_exprs_ = nullptr; + scan_param.op_ = nullptr; + scan_param.row2exprs_projector_ = nullptr; + scan_param.need_scn_ = false; + scan_param.pd_storage_flag_ = false; + ObTableParam *table_param = NULL; + void *buf = nullptr; + if (OB_ISNULL(buf = param_->allocator_.alloc(sizeof(ObTableParam)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("Fail to allocate memory", K(ret)); + } else { + table_param = new (buf) ObTableParam(param_->allocator_); + table_param->get_enable_lob_locator_v2() = true; + table_param->set_is_vec_index(true); + if (OB_FAIL(table_param->convert(*table_schema, scan_param.column_ids_, sql::ObStoragePushdownFlag()))) { + LOG_WARN("failed to convert table param.", K(ret)); + } else { + scan_param.table_param_ = table_param; + } + } + } // end of init snapshot + + // init scan range + if (OB_SUCC(ret)) { + ObNewRange scan_range; + scan_range.table_id_ = table_id; + scan_range.set_whole_range(); + if (OB_FAIL(scan_param.key_ranges_.push_back(scan_range))) { + LOG_WARN("failed to push back scan range", K(ret)); + } + } // end of set scan range + } + } + return ret; +} + +int ObDeleteLobMetaRowTask::process() +{ + int ret = OB_SUCCESS; + int end_trans_ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObDeleteLobMetaRowTask has not been inited", K(ret)); + } else { + ObTableScanParam scan_param; + transaction::ObTxDesc *tx_desc = nullptr; + transaction::ObTransService *txs = MTL(transaction::ObTransService*); + ObNewRowIterator *scan_iter = nullptr; + ObAccessService *tsc_service = MTL(ObAccessService *); + blocksstable::ObDatumRow *datum_row = nullptr; + ObTableScanIterator *table_scan_iter = nullptr; + storage::ObLobManager* lob_mngr = MTL(storage::ObLobManager*); + ObIDag *tmp_dag = get_dag(); + if (OB_ISNULL(txs) || OB_ISNULL(tsc_service) || OB_ISNULL(lob_mngr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should not be null", K(ret), KP(txs), KP(tsc_service), KP(lob_mngr)); + } else if (OB_FAIL(ObInsertLobColumnHelper::start_trans(param_->ls_id_, true/*is_for_read*/, INT64_MAX, tx_desc))) { + LOG_WARN("fail to get tx_desc", K(ret)); + } else if (OB_FAIL(txs->get_ls_read_snapshot(*tx_desc, transaction::ObTxIsolationLevel::RC, param_->ls_id_, INT64_MAX, scan_param.snapshot_))) { + LOG_WARN("fail to get snapshot", K(ret)); + } else if (OB_FAIL(init_scan_param(scan_param))) { + LOG_WARN("fail to init scan_param", K(ret)); + } else if (OB_FAIL(tsc_service->table_scan(scan_param, scan_iter))) { + if (OB_SNAPSHOT_DISCARDED == ret && scan_param.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("fail to scan table", K(scan_param), K(ret)); + } + } else if (OB_FALSE_IT(table_scan_iter = static_cast(scan_iter))) { + } else if (OB_ISNULL(table_scan_iter)) { + ret = OB_BAD_NULL_ERROR; + LOG_WARN("scan iter is nullptr", K(ret)); + } else { + while (OB_SUCC(ret)) { + if (OB_FAIL(table_scan_iter->get_next_row(datum_row))) { + if (OB_ITER_END != ret) { + LOG_WARN("failed to get next row from snapshot table.", K(ret)); + } + } else if (datum_row->get_column_count() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get row column cnt invalid.", K(ret), K(datum_row->get_column_count())); + } else if (OB_FAIL(ObInsertLobColumnHelper::delete_lob_column(param_->allocator_, + param_->ls_id_, + param_->tablet_id_, + collation_type_, + datum_row->storage_datums_[0], + INT64_MAX, + true))) { + LOG_WARN("failed to delete lob column", K(ret)); + } + } + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + } + + if (nullptr != tx_desc) { + if (OB_SUCCESS != (end_trans_ret = ObInsertLobColumnHelper::end_trans(tx_desc, OB_SUCCESS != ret, INT64_MAX))) { + LOG_WARN("fail to end read trans", K(ret)); + ret = end_trans_ret; + } + } + + if (OB_NOT_NULL(tmp_dag)) { + ObDeleteLobMetaRowDag *dag = nullptr; + if (OB_ISNULL(tmp_dag) || ObDagType::DAG_TYPE_DDL_DEL_LOB_META != tmp_dag->get_type()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("dag is invalid", K(ret), KP(tmp_dag)); + } else if (FALSE_IT(dag = static_cast(tmp_dag))) { + } else if (OB_SUCCESS != (tmp_ret = dag->report_replica_build_status())) { + // do not override ret if it has already failed. + ret = OB_SUCCESS == ret ? tmp_ret : ret; + LOG_WARN("fail to report replica build status", K(ret), K(tmp_ret)); + } + } + + if (OB_FAIL(ret)) { + param_->delete_lob_meta_ret_ = ret; + ret = OB_SUCCESS; + } + } + return ret; +} + +} //end namespace stroage +} //end namespace oceanbase diff --git a/src/storage/ddl/ob_delete_lob_meta_row_task.h b/src/storage/ddl/ob_delete_lob_meta_row_task.h new file mode 100644 index 0000000000..7a6744fe31 --- /dev/null +++ b/src/storage/ddl/ob_delete_lob_meta_row_task.h @@ -0,0 +1,138 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_STORAGE_OB_DELETE_LOB_META_ROW_TASK_H +#define OCEANBASE_STORAGE_OB_DELETE_LOB_META_ROW_TASK_H + +#include "storage/access/ob_table_access_context.h" +#include "share/scheduler/ob_tenant_dag_scheduler.h" +#include "storage/blocksstable/ob_block_sstable_struct.h" +#include "storage/compaction/ob_column_checksum_calculator.h" +#include "storage/ddl/ob_ddl_redo_log_writer.h" + + +namespace oceanbase +{ +namespace storage +{ + +struct ObDeleteLobMetaRowParam final +{ +public: + ObDeleteLobMetaRowParam(): + is_inited_(false), tenant_id_(common::OB_INVALID_TENANT_ID), + table_id_(common::OB_INVALID_ID), schema_id_(common::OB_INVALID_ID), ls_id_(share::ObLSID::INVALID_LS_ID), + tablet_id_(ObTabletID::INVALID_TABLET_ID), dest_tablet_id_(ObTabletID::INVALID_TABLET_ID), + row_store_type_(common::ENCODING_ROW_STORE), schema_version_(0), + snapshot_version_(0), task_id_(0), execution_id_(-1), tablet_task_id_(0), delete_lob_meta_ret_(common::OB_SUCCESS), + compat_mode_(lib::Worker::CompatMode::INVALID), data_format_version_(0), + allocator_("CompleteDataPar", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()) + {} + ~ObDeleteLobMetaRowParam() { destroy(); } + int init(const obrpc::ObDDLBuildSingleReplicaRequestArg &arg); + + bool is_valid() const + { + return common::OB_INVALID_TENANT_ID != tenant_id_ && ls_id_.is_valid() && common::OB_INVALID_ID != schema_id_ + && common::OB_INVALID_ID != table_id_ && tablet_id_.is_valid() && dest_tablet_id_.is_valid() + && snapshot_version_ > 0 && compat_mode_ != lib::Worker::CompatMode::INVALID + && execution_id_ >= 0 && tablet_task_id_ > 0 && data_format_version_ > 0; + } + + int get_hidden_table_key(ObITable::TableKey &table_key) const; + void destroy() + { + is_inited_ = false; + tenant_id_ = common::OB_INVALID_TENANT_ID; + ls_id_.reset(); + table_id_ = common::OB_INVALID_ID; + schema_id_ = common::OB_INVALID_ID; + tablet_id_.reset(); + dest_tablet_id_.reset(); + allocator_.reset(); + row_store_type_ = common::ENCODING_ROW_STORE; + schema_version_ = 0; + snapshot_version_ = 0; + task_id_ = 0; + execution_id_ = -1; + tablet_task_id_ = 0; + compat_mode_ = lib::Worker::CompatMode::INVALID; + data_format_version_ = 0; + } + TO_STRING_KV(K_(is_inited), K_(tenant_id), K_(ls_id), K_(table_id), K_(tablet_id), + K_(tablet_task_id), K_(schema_version), K_(snapshot_version), K_(task_id), + K_(execution_id), K_(compat_mode), K_(data_format_version)); +public: + bool is_inited_; + uint64_t tenant_id_; + uint64_t table_id_; + uint64_t schema_id_; + share::ObLSID ls_id_; + ObTabletID tablet_id_; + ObTabletID dest_tablet_id_; + common::ObRowStoreType row_store_type_; + int64_t schema_version_; + int64_t snapshot_version_; + int64_t task_id_; + int64_t execution_id_; + int64_t tablet_task_id_; + int delete_lob_meta_ret_; + lib::Worker::CompatMode compat_mode_; + uint64_t data_format_version_; + common::ObArenaAllocator allocator_; +}; + +class ObDeleteLobMetaRowDag final: public share::ObIDag +{ +public: + ObDeleteLobMetaRowDag(); + ~ObDeleteLobMetaRowDag(); + int init(const obrpc::ObDDLBuildSingleReplicaRequestArg &arg); + int64_t hash() const override; + bool operator==(const ObIDag& other) const override; + bool is_inited() const { return is_inited_; } + int fill_dag_key(char *buf, const int64_t buf_len) const override; + int report_replica_build_status(); + virtual lib::Worker::CompatMode get_compat_mode() const override + { return param_.compat_mode_; } + void handle_init_failed_ret_code(int ret) { param_.delete_lob_meta_ret_ = ret; } + virtual int fill_info_param(compaction::ObIBasicInfoParam *&out_param, ObIAllocator &allocator) const override; + virtual uint64_t get_consumer_group_id() const override { return consumer_group_id_; } + virtual bool is_ha_dag() const { return false; } + virtual int create_first_task() override; + virtual bool ignore_warning() override; +private: + bool is_inited_; + ObDeleteLobMetaRowParam param_; + DISALLOW_COPY_AND_ASSIGN(ObDeleteLobMetaRowDag); +}; + +class ObDeleteLobMetaRowTask : public share::ObITask +{ +public: + ObDeleteLobMetaRowTask(); + ~ObDeleteLobMetaRowTask(); + int init(ObDeleteLobMetaRowParam ¶m); + virtual int process() override; + int init_scan_param(ObTableScanParam& scan_param); + +private: + bool is_inited_; + ObDeleteLobMetaRowParam *param_; + ObCollationType collation_type_; + DISALLOW_COPY_AND_ASSIGN(ObDeleteLobMetaRowTask); +}; + + +} // end namespace table +} // end namespace oceanbase +#endif // OCEANBASE_STORAGE_OB_DELETE_LOB_META_ROW_TASK_H diff --git a/src/storage/ddl/ob_direct_insert_sstable_ctx_new.cpp b/src/storage/ddl/ob_direct_insert_sstable_ctx_new.cpp index 664be5fd34..50832c07d2 100644 --- a/src/storage/ddl/ob_direct_insert_sstable_ctx_new.cpp +++ b/src/storage/ddl/ob_direct_insert_sstable_ctx_new.cpp @@ -1422,6 +1422,49 @@ int ObTabletDirectLoadMgr::open_sstable_slice( return ret; } +int ObTabletDirectLoadMgr::prepare_schema_item_for_vec_idx_data( + const uint64_t tenant_id, + ObSchemaGetterGuard &schema_guard, + const ObTableSchema *table_schema, + const ObTableSchema *&data_table_schema) +{ + int ret = OB_SUCCESS; + ObSEArray col_ids; + uint64_t delta_buffer_table_tid; + const ObTableSchema *delta_buffer_table_schema = nullptr; + // get data schema + if (OB_FAIL(schema_guard.get_table_schema(tenant_id, table_schema->get_data_table_id(), data_table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(tenant_id), K(table_schema->get_data_table_id())); + } else if (OB_ISNULL(data_table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("table not exist", K(ret), K(tenant_id), K(table_schema->get_data_table_id())); + } else if (OB_FAIL(ObVectorIndexUtil::get_vector_index_column_id(*data_table_schema, *table_schema, col_ids))) { + LOG_WARN("fail to get vector index id", K(ret)); + } else if (col_ids.count() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get invalid col id array", K(ret), K(col_ids)); + } else if (OB_FAIL(ObVectorIndexUtil::get_vector_index_tid(&schema_guard, + *data_table_schema, + INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL, + col_ids.at(0), + delta_buffer_table_tid))) { + LOG_WARN("fail to get spec vector delta buffer table id", K(ret), K(col_ids), KPC(data_table_schema)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, delta_buffer_table_tid, delta_buffer_table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(tenant_id), K(delta_buffer_table_tid)); + } else if (OB_ISNULL(delta_buffer_table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("table not exist", K(ret), K(tenant_id), K(delta_buffer_table_tid)); + } else if (OB_FAIL(ObVectorIndexUtil::get_vector_index_column_dim(*delta_buffer_table_schema, schema_item_.vec_dim_))) { + LOG_WARN("fail to get vector col dim", K(ret)); + } else if (schema_item_.vec_dim_ == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get vector dim is zero, fail to calc", K(ret), K(schema_item_.vec_dim_), KPC(delta_buffer_table_schema)); + } else if (OB_FAIL(ob_write_string(sqc_build_ctx_.schema_allocator_, delta_buffer_table_schema->get_index_params(), schema_item_.vec_idx_param_))) { + LOG_WARN("fail to write string", K(ret), K(delta_buffer_table_schema->get_index_params())); + } + return ret; +} + int ObTabletDirectLoadMgr::prepare_schema_item_on_demand(const uint64_t table_id, const int64_t parallel) { @@ -1443,6 +1486,8 @@ int ObTabletDirectLoadMgr::prepare_schema_item_on_demand(const uint64_t table_id ObSchemaGetterGuard schema_guard; const ObDataStoreDesc &data_desc = sqc_build_ctx_.data_block_desc_.get_desc(); const ObTableSchema *table_schema = nullptr; + const ObTableSchema *data_table_schema = nullptr; + bool is_vector_data_complement = false; if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard(tenant_id, schema_guard))) { LOG_WARN("get tenant schema failed", K(ret), K(tenant_id), K(table_id)); } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, table_id, table_schema))) { @@ -1454,11 +1499,21 @@ int ObTabletDirectLoadMgr::prepare_schema_item_on_demand(const uint64_t table_id LOG_WARN("prepare sstable index builder failed", K(ret), K(sqc_build_ctx_)); } else if (OB_FAIL(table_schema->get_is_column_store(schema_item_.is_column_store_))) { LOG_WARN("fail to get is column store", K(ret)); + } else if (FALSE_IT(is_vector_data_complement = table_schema->is_vec_index_snapshot_data_type())) { + } else if (is_vector_data_complement && OB_FAIL(prepare_schema_item_for_vec_idx_data(tenant_id, + schema_guard, + table_schema, + data_table_schema))) { + LOG_WARN("fail to prepare vector index data", K(ret)); + } + if (OB_FAIL(ret)) { } else { schema_item_.is_index_table_ = table_schema->is_index_table(); schema_item_.rowkey_column_num_ = table_schema->get_rowkey_column_num(); schema_item_.is_unique_index_ = table_schema->is_unique_index(); - schema_item_.lob_inrow_threshold_ = table_schema->get_lob_inrow_threshold(); + schema_item_.lob_inrow_threshold_ = is_vector_data_complement ? + data_table_schema->get_lob_inrow_threshold() : + table_schema->get_lob_inrow_threshold(); if (OB_FAIL(column_items_.reserve(data_desc.get_col_desc_array().count()))) { LOG_WARN("reserve column schema array failed", K(ret), K(data_desc.get_col_desc_array().count()), K(column_items_)); @@ -1466,16 +1521,23 @@ int ObTabletDirectLoadMgr::prepare_schema_item_on_demand(const uint64_t table_id for (int64_t i = 0; OB_SUCC(ret) && i < data_desc.get_col_desc_array().count(); ++i) { const ObColDesc &col_desc = data_desc.get_col_desc_array().at(i); const schema::ObColumnSchemaV2 *column_schema = nullptr; + const schema::ObColumnSchemaV2 *data_column_schema = nullptr; ObColumnSchemaItem column_item; if (i >= table_schema->get_rowkey_column_num() && i < table_schema->get_rowkey_column_num() + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt()) { // skip multi version column, keep item invalid } else if (OB_ISNULL(column_schema = table_schema->get_column_schema(col_desc.col_id_))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("column schema is null", K(ret), K(i), K(data_desc.get_col_desc_array()), K(col_desc.col_id_)); + } else if (is_vector_data_complement && OB_ISNULL(data_column_schema = data_table_schema->get_column_schema(col_desc.col_id_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data column schema is null", K(ret), K(i), K(data_desc.get_col_desc_array()), K(col_desc.col_id_)); } else { column_item.is_valid_ = true; column_item.col_type_ = column_schema->get_meta_type(); column_item.col_accuracy_ = column_schema->get_accuracy(); + if (is_vector_data_complement) { + column_item.column_flags_ = data_column_schema->get_column_flags(); + } } if (OB_SUCC(ret)) { if (OB_FAIL(column_items_.push_back(column_item))) { @@ -2030,8 +2092,18 @@ int ObTabletDirectLoadMgr::close_sstable_slice( LOG_WARN("invalid tablet handle", K(ret), KP(sqc_build_ctx_.storage_schema_)); } else if (!need_fill_column_group_) { if (task_finish_count >= sqc_build_ctx_.task_total_cnt_) { + if (schema::is_vec_index_snapshot_data_type(sqc_build_ctx_.storage_schema_->get_index_type())) { + if (OB_FAIL(slice_writer->fill_vector_index_data(sqc_build_ctx_.build_param_.common_param_.read_snapshot_, + sqc_build_ctx_.storage_schema_, + start_scn, + schema_item_.lob_inrow_threshold_, + insert_monitor))) { + LOG_WARN("fail to fill vector index data", K(ret)); + } + } // for ddl, write commit log when all slices ready. - if (OB_FAIL(close(execution_id, start_scn))) { + if (OB_FAIL(ret)) { + } else if (OB_FAIL(close(execution_id, start_scn))) { LOG_WARN("close sstable slice failed", K(ret), K(sqc_build_ctx_.build_param_)); } } diff --git a/src/storage/ddl/ob_direct_insert_sstable_ctx_new.h b/src/storage/ddl/ob_direct_insert_sstable_ctx_new.h index 0b2826150e..f5aa9fd9fd 100644 --- a/src/storage/ddl/ob_direct_insert_sstable_ctx_new.h +++ b/src/storage/ddl/ob_direct_insert_sstable_ctx_new.h @@ -430,6 +430,12 @@ public: private: int prepare_schema_item_on_demand(const uint64_t table_id, const int64_t parallel); + int prepare_schema_item_for_vec_idx_data( + const uint64_t tenant_id, + ObSchemaGetterGuard &schema_guard, + const ObTableSchema *table_schema, + const ObTableSchema *&data_table_schema); + void calc_cg_idx(const int64_t thread_cnt, const int64_t thread_id, int64_t &strat_idx, int64_t &end_idx); int fill_aggregated_column_group( const int64_t start_idx, diff --git a/src/storage/ddl/ob_direct_load_struct.cpp b/src/storage/ddl/ob_direct_load_struct.cpp index e54363c873..9a71bd5901 100644 --- a/src/storage/ddl/ob_direct_load_struct.cpp +++ b/src/storage/ddl/ob_direct_load_struct.cpp @@ -28,6 +28,9 @@ #include "sql/engine/expr/ob_expr_lob_utils.h" #include "sql/das/ob_das_utils.h" #include "sql/engine/basic/chunk_store/ob_compact_store.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "share/vector_index/ob_plugin_vector_index_adaptor.h" +#include "share/vector_index/ob_plugin_vector_index_service.h" using namespace oceanbase; using namespace oceanbase::common; @@ -65,7 +68,8 @@ ObDDLInsertRowIterator::ObDDLInsertRowIterator() lob_slice_id_(-1), lob_id_cache_(), context_id_(-1), - macro_seq_() + macro_seq_(), + is_skip_lob_(false) { } @@ -82,7 +86,8 @@ int ObDDLInsertRowIterator::init( const int64_t rowkey_cnt, const int64_t snapshot_version, const int64_t context_id, - const int64_t parallel_idx) + const int64_t parallel_idx, + const bool is_skip_lob) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { @@ -104,6 +109,7 @@ int ObDDLInsertRowIterator::init( context_id_ = context_id; is_next_row_cached_ = true; lob_id_cache_.set(1/*start*/, 0/*end*/); + is_skip_lob_ = is_skip_lob; is_inited_ = true; } return ret; @@ -924,7 +930,9 @@ int ObDirectLoadSliceWriter::prepare_slice_store_if_need( const int64_t dir_id, const int64_t parallelism, const ObStorageSchema *storage_schema, - const SCN &start_scn) + const SCN &start_scn, + const ObString vec_idx_param, + const int64_t vec_dim) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { @@ -932,6 +940,26 @@ int ObDirectLoadSliceWriter::prepare_slice_store_if_need( LOG_WARN("not init", K(ret)); } else if (nullptr != slice_store_) { // do nothing + } else if (is_full_direct_load(tablet_direct_load_mgr_->get_direct_load_type()) && + OB_NOT_NULL(storage_schema) && + schema::is_vec_index_snapshot_data_type(storage_schema->get_index_type())) { // TODO @lhd + ObVectorIndexSliceStore *vec_idx_slice_store = nullptr; + if (OB_ISNULL(storage_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("null schema", K(ret), K(*this)); + } else if (OB_ISNULL(vec_idx_slice_store = OB_NEWx(ObVectorIndexSliceStore, &allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory for chunk slice store failed", K(ret)); + } else if (OB_FAIL(vec_idx_slice_store->init(tablet_direct_load_mgr_, vec_idx_param, vec_dim, + tablet_direct_load_mgr_->get_column_info()))) { + LOG_WARN("init vector index slice store failed", K(ret), KPC(storage_schema)); + } else { + slice_store_ = vec_idx_slice_store; + } + if (OB_FAIL(ret) && nullptr != vec_idx_slice_store) { + vec_idx_slice_store->~ObVectorIndexSliceStore(); + allocator_.free(vec_idx_slice_store); + } } else if (tablet_direct_load_mgr_->need_process_cs_replica()) { writer_type_ = ObDirectLoadSliceWriterType::COL_REPLICA_WRITER; ObMultiSliceStore *multi_slice_store = nullptr; @@ -1178,7 +1206,8 @@ int ObDirectLoadSliceWriter::fill_lob_into_macro_block( } else if (OB_FAIL(check_null(false/*is_index_table*/, ObLobMetaUtil::LOB_META_SCHEMA_ROWKEY_COL_CNT, *cur_row))) { LOG_WARN("fail to check null value in row", KR(ret), KPC(cur_row)); } else if (OB_FAIL(prepare_slice_store_if_need(ObLobMetaUtil::LOB_META_SCHEMA_ROWKEY_COL_CNT, - false/*is_column_store*/, 1L/*unsued*/, 1L/*unused*/, nullptr /*storage_schema*/, start_scn))) { + false/*is_column_store*/, 1L/*unsued*/, 1L/*unused*/, nullptr /*storage_schema*/, start_scn, + ObString()/*unsued*/, 0/*unsued*/))) { LOG_WARN("prepare macro block writer failed", K(ret)); } else if (OB_FAIL(slice_store_->append_row(*cur_row))) { LOG_WARN("macro block writer append row failed", K(ret), KPC(cur_row)); @@ -1245,7 +1274,9 @@ int ObDirectLoadSliceWriter::fill_lob_meta_sstable_slice( 1L/*unsued*/, 1L/*unused*/, nullptr /*storage_schema*/, - start_scn))) { + start_scn, + ObString()/*unsued*/, + 0/*unsued*/))) { LOG_WARN("prepare macro block writer failed", K(ret)); } else if (OB_FAIL(slice_store_->append_row(*cur_row))) { LOG_WARN("macro block writer append row failed", K(ret), KPC(cur_row)); @@ -1323,7 +1354,14 @@ int ObDirectLoadSliceWriter::fill_sstable_slice( if (OB_FAIL(ret)) { } else if (OB_FAIL(check_null(schema_item.is_index_table_, schema_item.rowkey_column_num_, *cur_row))) { LOG_WARN("fail to check null value in row", KR(ret), KPC(cur_row)); - } else if (OB_FAIL(prepare_slice_store_if_need(schema_item.rowkey_column_num_, schema_item.is_column_store_, dir_id, parallelism, storage_schema, start_scn))) { + } else if (OB_FAIL(prepare_slice_store_if_need(schema_item.rowkey_column_num_, + schema_item.is_column_store_, + dir_id, + parallelism, + storage_schema, + start_scn, + schema_item.vec_idx_param_, + schema_item.vec_dim_))) { LOG_WARN("prepare macro block writer failed", K(ret)); } else if (OB_FAIL(slice_store_->append_row(*cur_row))) { if (is_full_direct_load_task && OB_ERR_PRIMARY_KEY_DUPLICATE == ret && schema_item.is_unique_index_) { @@ -1339,7 +1377,7 @@ int ObDirectLoadSliceWriter::fill_sstable_slice( } } if (OB_SUCC(ret)) { - LOG_DEBUG("sstable insert op append row", KPC(cur_row), KPC(cur_row)); + LOG_DEBUG("sstable insert op append row", K(tablet_id), KPC(cur_row), KPC(cur_row)); ++affected_rows; if (OB_NOT_NULL(insert_monitor)) { insert_monitor->inserted_row_cnt_ = insert_monitor->inserted_row_cnt_ + 1; @@ -1469,6 +1507,81 @@ int ObDirectLoadSliceWriter::close() return ret; } +int ObDirectLoadSliceWriter::fill_vector_index_data( + const int64_t snapshot_version, + const ObStorageSchema *storage_schema, + const SCN &start_scn, + const int64_t lob_inrow_threshold, + ObInsertMonitor* insert_monitor) +{ + int ret = OB_SUCCESS; + int end_trans_ret = OB_SUCCESS; + ObTxDesc *tx_desc = nullptr; + ObMacroBlockSliceStore *macro_block_slice_store = nullptr; + ObVectorIndexSliceStore *vec_idx_slice_store = static_cast(slice_store_); + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (OB_ISNULL(storage_schema) || snapshot_version < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(storage_schema), KP(vec_idx_slice_store), K(snapshot_version)); + } else if (OB_ISNULL(vec_idx_slice_store)) { + // do nothing + LOG_INFO("[vec index debug] maybe no data for this tablet", K(tablet_direct_load_mgr_->get_tablet_id())); + } else if (OB_FAIL(ObInsertLobColumnHelper::start_trans(tablet_direct_load_mgr_->get_ls_id(), false/*is_for_read*/, INT64_MAX - ObInsertLobColumnHelper::LOB_ACCESS_TX_TIMEOUT, tx_desc))) { + LOG_WARN("fail to get tx_desc", K(ret)); + } else if (OB_FAIL(vec_idx_slice_store->serialize_vector_index(&allocator_, tx_desc, lob_inrow_threshold))) { + LOG_WARN("fail to do vector index snapshot data serialize", K(ret)); + } else { + // build macro slice + if (OB_ISNULL(macro_block_slice_store = OB_NEWx(ObMacroBlockSliceStore, &allocator_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory for macro block slice store failed", K(ret)); + } else if (OB_FAIL(macro_block_slice_store->init(tablet_direct_load_mgr_, start_seq_, start_scn))) { + LOG_WARN("init macro block slice store failed", K(ret), KPC(tablet_direct_load_mgr_), K(start_seq_)); + } else { + const int64_t rk_cnt = storage_schema->get_rowkey_column_num(); + const int64_t col_cnt = storage_schema->get_column_count(); + blocksstable::ObDatumRow *datum_row = nullptr; + // do write + while (OB_SUCC(ret)) { + // build row + if (OB_FAIL(vec_idx_slice_store->get_next_vector_data_row(rk_cnt, col_cnt, snapshot_version, datum_row))) { + if (ret != OB_ITER_END) { + LOG_WARN("fail to get next vector data row", K(ret), KPC(vec_idx_slice_store)); + } + } else if (OB_FAIL(macro_block_slice_store->append_row(*datum_row))) { + LOG_WARN("fail to append row to macro block slice store", K(ret), KPC(macro_block_slice_store)); + } else { + LOG_INFO("[vec index debug] append one row into vec data tablet", K(tablet_direct_load_mgr_->get_tablet_id()), KPC(datum_row)); + if (OB_NOT_NULL(insert_monitor)) { + insert_monitor->inserted_row_cnt_ = insert_monitor->inserted_row_cnt_ + 1; + } + } + } + if (ret == OB_ITER_END) { + ret = OB_SUCCESS; + } + if (OB_SUCC(ret)) { + if (OB_FAIL(macro_block_slice_store->close())) { + LOG_WARN("fail to close macro_block_slice_store", K(ret)); + } + } + } + } + if (OB_NOT_NULL(tx_desc)) { + if (OB_SUCCESS != (end_trans_ret = ObInsertLobColumnHelper::end_trans(tx_desc, OB_SUCCESS != ret, INT64_MAX))) { + LOG_WARN("fail to end read trans", K(ret), K(end_trans_ret)); + ret = end_trans_ret; + } + } + if (nullptr != macro_block_slice_store) { + macro_block_slice_store->~ObMacroBlockSliceStore(); + allocator_.free(macro_block_slice_store); + } + return ret; +} + int ObDirectLoadSliceWriter::fill_column_group(const ObStorageSchema *storage_schema, const SCN &start_scn, ObInsertMonitor* insert_monitor) { int ret = OB_SUCCESS; @@ -1700,3 +1813,260 @@ int ObCOSliceWriter::close() ObInsertMonitor::~ObInsertMonitor() { } + +int ObVectorIndexSliceStore::init( + ObTabletDirectLoadMgr *tablet_direct_load_mgr, + const ObString vec_idx_param, + const int64_t vec_dim, + const ObIArray &col_array) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret), K(is_inited_)); + } else if (OB_UNLIKELY(nullptr == tablet_direct_load_mgr)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(tablet_direct_load_mgr)); + } else { + is_inited_ = true; + ctx_.ls_id_ = tablet_direct_load_mgr->get_ls_id(); + tablet_id_ = tablet_direct_load_mgr->get_tablet_id(); + vec_idx_param_ = vec_idx_param; + vec_dim_ = vec_dim; + // get data tablet id and lob tablet id + ObLSHandle ls_handle; + ObTabletHandle five_tablet_handle; + ObTabletHandle data_tablet_handle; + ObTabletBindingMdsUserData ddl_data; + if (OB_FAIL(MTL(ObLSService *)->get_ls(ctx_.ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(ctx_.ls_id_)); + } else if (OB_ISNULL(ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("ls should not be null", K(ret)); + } else if (OB_FAIL(ls_handle.get_ls()->get_tablet(tablet_id_, five_tablet_handle))) { + LOG_WARN("fail to get tablet handle", K(ret), K(tablet_id_)); + } else if (FALSE_IT(ctx_.data_tablet_id_ = five_tablet_handle.get_obj()->get_data_tablet_id())) { + } else if (OB_FAIL(ls_handle.get_ls()->get_tablet(ctx_.data_tablet_id_, data_tablet_handle))) { + LOG_WARN("fail to get tablet handle", K(ret), K(ctx_.data_tablet_id_)); + } else if (OB_FAIL(data_tablet_handle.get_obj()->get_ddl_data(share::SCN::max_scn(), ddl_data))) { + LOG_WARN("failed to get ddl data from tablet", K(ret), K(data_tablet_handle)); + } else { + ctx_.lob_meta_tablet_id_ = ddl_data.lob_meta_tablet_id_; + ctx_.lob_piece_tablet_id_ = ddl_data.lob_piece_tablet_id_; + } + // get vid col and vector col + for (int64_t i = 0; OB_SUCC(ret) && i < col_array.count(); i++) { + if (ObSchemaUtils::is_vec_vid_column(col_array.at(i).column_flags_)) { + vector_vid_col_idx_ = i; + } else if (ObSchemaUtils::is_vec_vector_column(col_array.at(i).column_flags_)) { + vector_col_idx_ = i; + } else if (ObSchemaUtils::is_vec_key_column(col_array.at(i).column_flags_)) { + vector_key_col_idx_ = i; + } else if (ObSchemaUtils::is_vec_data_column(col_array.at(i).column_flags_)) { + vector_data_col_idx_ = i; + } + } + if (OB_SUCC(ret)) { + if (vector_vid_col_idx_ == -1 || vector_col_idx_ == -1 || vector_key_col_idx_ == -1 || vector_data_col_idx_ == -1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get valid vector index col idx", K(ret), K(vector_col_idx_), K(vector_vid_col_idx_), + K(vector_key_col_idx_), K(vector_data_col_idx_), K(col_array)); + } + } + } + return ret; +} + +int ObVectorIndexSliceStore::append_row(const blocksstable::ObDatumRow &datum_row) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else { + // append to vector inedx adaptor + ObPluginVectorIndexService *vec_index_service = MTL(ObPluginVectorIndexService *); + ObPluginVectorIndexAdapterGuard adaptor_guard; + if (OB_ISNULL(vec_index_service)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null ObPluginVectorIndexService ptr", K(ret), K(MTL_ID())); + } else if (OB_FAIL(vec_index_service->acquire_adapter_guard(ctx_.ls_id_, + tablet_id_, + ObIndexType::INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL, + adaptor_guard, + &vec_idx_param_, + vec_dim_))) { + LOG_WARN("fail to get ObMockPluginVectorIndexAdapter", K(ret), K(ctx_.ls_id_), K(tablet_id_)); + } else { + // get vid and vector + ObString vec_str; + int64_t vec_vid; + if (datum_row.get_column_count() <= vector_vid_col_idx_ || datum_row.get_column_count() <= vector_col_idx_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get valid vector index col idx", K(ret), K(vector_col_idx_), K(vector_vid_col_idx_), K(datum_row)); + } else if (FALSE_IT(vec_vid = datum_row.storage_datums_[vector_vid_col_idx_].get_int())) { + } else if (FALSE_IT(vec_str = datum_row.storage_datums_[vector_col_idx_].get_string())) { + } else if (OB_FAIL(ObTextStringHelper::read_real_string_data(&tmp_allocator_, + ObLongTextType, + CS_TYPE_BINARY, + true, + vec_str))) { + LOG_WARN("fail to get real data.", K(ret), K(vec_str)); + } else if (OB_FAIL(adaptor_guard.get_adatper()->add_snap_index(reinterpret_cast(vec_str.ptr()), &vec_vid, 1))) { + LOG_WARN("fail to build index to adaptor", K(ret), KPC(this)); + } else { + LOG_INFO("[vec index debug] add into snap index success", K(tablet_id_), K(vec_vid), K(vec_str)); + } + } + } + tmp_allocator_.reuse(); + return ret; +} + +int ObVectorIndexSliceStore::close() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else { + // do nothing + } + return ret; +} + +void ObVectorIndexSliceStore::reset() +{ + is_inited_ = false; + row_cnt_ = 0; + ctx_.reset(); + tablet_id_.reset(); + vec_idx_param_.reset(); + vec_dim_ = 0; + vector_vid_col_idx_ = -1; + vector_col_idx_ = -1; + vector_key_col_idx_ = -1; + vector_data_col_idx_ = -1; + current_row_.reset(); + cur_row_pos_ = 0; + vec_allocator_.reset(); + tmp_allocator_.reset(); +} + +int ObVectorIndexSliceStore::serialize_vector_index( + ObIAllocator *allocator, + ObTxDesc *tx_desc, + int64_t lob_inrow_threshold) +{ + int ret = OB_SUCCESS; + tmp_allocator_.reuse(); + // first we do vsag serialize + ObPluginVectorIndexService *vec_index_service = MTL(ObPluginVectorIndexService *); + ObPluginVectorIndexAdapterGuard adaptor_guard; + if (OB_ISNULL(vec_index_service)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get null ObPluginVectorIndexService ptr", K(ret), K(MTL_ID())); + } else if (OB_FAIL(vec_index_service->acquire_adapter_guard(ctx_.ls_id_, + tablet_id_, + ObIndexType::INDEX_TYPE_VEC_INDEX_SNAPSHOT_DATA_LOCAL, + adaptor_guard, + &vec_idx_param_, + vec_dim_))) { + LOG_WARN("fail to get ObMockPluginVectorIndexAdapter", K(ret), K(ctx_.ls_id_), K(tablet_id_)); + } else { + ObHNSWSerializeCallback callback; + ObOStreamBuf::Callback cb = callback; + + ObHNSWSerializeCallback::CbParam param; + param.vctx_ = &ctx_; + param.allocator_ = allocator; + param.tmp_allocator_ = &tmp_allocator_; + param.lob_inrow_threshold_ = lob_inrow_threshold; + // build tx + oceanbase::transaction::ObTransService *txs = MTL(transaction::ObTransService*); + oceanbase::transaction::ObTxReadSnapshot snapshot; + int64_t timeout = ObTimeUtility::fast_current_time() + ObInsertLobColumnHelper::LOB_ACCESS_TX_TIMEOUT; + if (OB_ISNULL(tx_desc)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get tx desc, get nullptr", K(ret)); + } else if (OB_FAIL(txs->get_ls_read_snapshot(*tx_desc, transaction::ObTxIsolationLevel::RC, ctx_.ls_id_, timeout, snapshot))) { + LOG_WARN("fail to get snapshot", K(ret)); + } else { + param.timeout_ = timeout; + param.snapshot_ = &snapshot; + param.tx_desc_ = tx_desc; + if (OB_FAIL(adaptor_guard.get_adatper()->serialize(allocator, param, cb))) { + LOG_WARN("fail to do vsag serialize", K(ret)); + } else { + LOG_INFO("finish vsag serialize for tablet", K(tablet_id_), K(ctx_.get_vals().count())); + } + } + } + tmp_allocator_.reuse(); + return ret; +} + +bool ObVectorIndexSliceStore::is_vec_idx_col_invalid(const int64_t column_cnt) const +{ + return vector_key_col_idx_ < 0 || vector_key_col_idx_ >= column_cnt || + vector_data_col_idx_ < 0 || vector_data_col_idx_ >= column_cnt || + vector_vid_col_idx_ < 0 || vector_vid_col_idx_ >= column_cnt || + vector_col_idx_ < 0 || vector_col_idx_ >= column_cnt; +} + +int ObVectorIndexSliceStore::get_next_vector_data_row( + const int64_t rowkey_cnt, + const int64_t column_cnt, + const int64_t snapshot_version, + blocksstable::ObDatumRow *&datum_row) +{ + int ret = OB_SUCCESS; + const int64_t extra_rowkey_cnt = storage::ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); + const int64_t request_cnt = column_cnt + extra_rowkey_cnt; + if (current_row_.get_column_count() <= 0 + && OB_FAIL(current_row_.init(vec_allocator_, request_cnt))) { + LOG_WARN("init datum row failed", K(ret), K(request_cnt)); + } else if (OB_UNLIKELY(current_row_.get_column_count() != request_cnt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected err", K(ret), K(request_cnt), "datum_row_cnt", current_row_.get_column_count()); + } else if (cur_row_pos_ >= ctx_.vals_.count()) { + ret = OB_ITER_END; + } else if (is_vec_idx_col_invalid(current_row_.get_column_count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, vec col idx error", K(ret), K(vector_key_col_idx_), K(vector_data_col_idx_), + K(vector_vid_col_idx_), K(vector_col_idx_)); + } else { + // set vec key + int64_t key_pos = 0; + char *key_str = static_cast(vec_allocator_.alloc(OB_VEC_IDX_SNAPSHOT_KEY_LENGTH)); + if (OB_ISNULL(key_str)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc vec key", K(ret)); + } else if (OB_FAIL(databuff_printf(key_str, OB_VEC_IDX_SNAPSHOT_KEY_LENGTH, key_pos, "%lu_hnsw_data_part%05ld", tablet_id_.id(), cur_row_pos_))) { + LOG_WARN("fail to build vec snapshot key str", K(ret)); + } else { + current_row_.storage_datums_[vector_key_col_idx_].set_string(key_str, key_pos); + } + // set vec data + if (OB_FAIL(ret)) { + } else { + // TODO @lhd maybe we should do deep copy + current_row_.storage_datums_[vector_data_col_idx_].set_string(ctx_.vals_.at(cur_row_pos_)); + } + // set vid and vec to null + if (OB_SUCC(ret)) { + current_row_.storage_datums_[vector_vid_col_idx_].set_null(); + current_row_.storage_datums_[vector_col_idx_].set_null(); + } + if (OB_SUCC(ret)) { + // add extra rowkey + // TODO how to get snapshot + current_row_.storage_datums_[rowkey_cnt].set_int(-snapshot_version); + current_row_.storage_datums_[rowkey_cnt + 1].set_int(0); + current_row_.row_flag_.set_flag(ObDmlFlag::DF_INSERT); + datum_row = ¤t_row_; + cur_row_pos_++; + } + } + return ret; +} diff --git a/src/storage/ddl/ob_direct_load_struct.h b/src/storage/ddl/ob_direct_load_struct.h index f00e1f6ae2..329988765f 100644 --- a/src/storage/ddl/ob_direct_load_struct.h +++ b/src/storage/ddl/ob_direct_load_struct.h @@ -20,6 +20,7 @@ #include "share/scn.h" #include "share/ob_tablet_autoincrement_param.h" #include "share/scheduler/ob_tenant_dag_scheduler.h" +#include "share/vector_index/ob_vector_index_util.h" #include "share/ob_ddl_common.h" #include "sql/engine/basic/ob_chunk_datum_store.h" #include "sql/engine/basic/chunk_store/ob_compact_store.h" @@ -133,7 +134,8 @@ struct ObTableSchemaItem final public: ObTableSchemaItem() : is_column_store_(false), is_index_table_(false), is_unique_index_(false), rowkey_column_num_(0), - compress_type_(NONE_COMPRESSOR), lob_inrow_threshold_(OB_DEFAULT_LOB_INROW_THRESHOLD) + compress_type_(NONE_COMPRESSOR), lob_inrow_threshold_(OB_DEFAULT_LOB_INROW_THRESHOLD), + vec_idx_param_(), vec_dim_(0) {} ~ObTableSchemaItem() { reset(); } void reset() @@ -144,9 +146,11 @@ public: rowkey_column_num_ = 0; compress_type_ = NONE_COMPRESSOR; lob_inrow_threshold_ = OB_DEFAULT_LOB_INROW_THRESHOLD; + vec_idx_param_.reset(); + vec_dim_ = 0; } TO_STRING_KV(K(is_column_store_), K(is_index_table_), K(is_unique_index_), K(rowkey_column_num_), - K(compress_type_), K_(lob_inrow_threshold)); + K(compress_type_), K_(lob_inrow_threshold), K_(vec_idx_param), K_(vec_dim)); public: bool is_column_store_; @@ -155,13 +159,15 @@ public: int64_t rowkey_column_num_; common::ObCompressorType compress_type_; int64_t lob_inrow_threshold_; + ObString vec_idx_param_; + int64_t vec_dim_; }; struct ObColumnSchemaItem final { public: ObColumnSchemaItem() - : is_valid_(false), col_type_(), col_accuracy_() + : is_valid_(false), col_type_(), col_accuracy_(), column_flags_(0) {} ObColumnSchemaItem(const ObColumnSchemaItem &other) { @@ -173,12 +179,14 @@ public: is_valid_ = false; col_type_.reset(); col_accuracy_.reset(); + column_flags_ = 0; } ObColumnSchemaItem &operator=(const ObColumnSchemaItem &other) { is_valid_ = other.is_valid_; col_type_ = other.col_type_; col_accuracy_ = other.col_accuracy_; + column_flags_ = other.column_flags_; return *this; } int assign(const ObColumnSchemaItem &other) @@ -186,13 +194,15 @@ public: is_valid_ = other.is_valid_; col_type_ = other.col_type_; col_accuracy_ = other.col_accuracy_; + column_flags_ = other.column_flags_; return OB_SUCCESS; } - TO_STRING_KV(K(is_valid_), K(col_type_), K(col_accuracy_)); + TO_STRING_KV(K(is_valid_), K(col_type_), K(col_accuracy_), K(column_flags_)); public: bool is_valid_; common::ObObjMeta col_type_; ObAccuracy col_accuracy_; + int64_t column_flags_; }; // usued in replay replay and runtime execution @@ -281,15 +291,16 @@ public: const int64_t rowkey_cnt, const int64_t snapshot_version, const int64_t context_id, - const int64_t parallel_idx); + const int64_t parallel_idx, + const bool is_skip_lob = false); virtual int get_next_row(const blocksstable::ObDatumRow *&row) override { - const bool skip_lob = false; - return get_next_row(skip_lob, row); + // const bool skip_lob = false; + return get_next_row(is_skip_lob_, row); } int get_next_row(const bool skip_lob, const blocksstable::ObDatumRow *&row); TO_STRING_KV(K_(is_inited), K_(ls_id), K_(current_tablet_id), K_(current_row), K_(is_slice_empty), K_(is_next_row_cached), K_(rowkey_count), K_(snapshot_version), - K_(lob_slice_id), K_(lob_id_cache), K_(context_id), K_(macro_seq)); + K_(lob_slice_id), K_(lob_id_cache), K_(context_id), K_(macro_seq), K_(is_skip_lob)); public: int switch_to_new_lob_slice(); int close_lob_sstable_slice(); @@ -311,6 +322,7 @@ private: share::ObTabletCacheInterval lob_id_cache_; int64_t context_id_; blocksstable::ObMacroDataSeq macro_seq_; + bool is_skip_lob_; }; class ObLobMetaRowIterator : public ObIStoreRowIterator @@ -419,6 +431,57 @@ public: DECLARE_PURE_VIRTUAL_TO_STRING; }; +class ObVectorIndexSliceStore : public ObTabletSliceStore +{ +public: + ObVectorIndexSliceStore() + : is_inited_(false), vec_allocator_("VecIdxSS", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), + tmp_allocator_("VecIdxSSAR", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()), + row_cnt_(0), ctx_(), tablet_id_(), + vec_idx_param_(), vec_dim_(0), vector_vid_col_idx_(-1), + vector_col_idx_(-1), current_row_(), cur_row_pos_(0) + {} + virtual ~ObVectorIndexSliceStore() { reset(); } + int init(ObTabletDirectLoadMgr *tablet_direct_load_mgr, + const ObString vec_idx_param, + const int64_t vec_dim, + const ObIArray &col_array); + virtual int append_row(const blocksstable::ObDatumRow &datum_row) override; + virtual int close() override; + virtual int64_t get_row_count() const { return row_cnt_; } + void reset(); + // vector index functions + int serialize_vector_index( + ObIAllocator *allocator, + transaction::ObTxDesc *tx_desc, + int64_t lob_inrow_threshold); + int get_next_vector_data_row( + const int64_t rowkey_cnt, + const int64_t column_cnt, + const int64_t snapshot_version, + blocksstable::ObDatumRow *&datum_row); + TO_STRING_KV(K(is_inited_), K(row_cnt_), K(ctx_), K(tablet_id_), K(vec_idx_param_), K(vec_dim_), + K(vector_vid_col_idx_), K(vector_col_idx_), K(vector_key_col_idx_), K(vector_data_col_idx_)); +private: + static const int64_t OB_VEC_IDX_SNAPSHOT_KEY_LENGTH = 256; + bool is_vec_idx_col_invalid(const int64_t column_cnt) const; +public: + bool is_inited_; + ObArenaAllocator vec_allocator_; + ObArenaAllocator tmp_allocator_; + int64_t row_cnt_; + ObVecIdxSnapshotDataWriteCtx ctx_; + ObTabletID tablet_id_; + ObString vec_idx_param_; + int64_t vec_dim_; + int32_t vector_vid_col_idx_; + int32_t vector_col_idx_; + int32_t vector_key_col_idx_; + int32_t vector_data_col_idx_; + blocksstable::ObDatumRow current_row_; + int64_t cur_row_pos_; +}; + class ObChunkSliceStore : public ObTabletSliceStore { public: @@ -579,6 +642,12 @@ public: const int64_t cg_idx, ObCOSliceWriter *cur_writer, ObIArray &datum_stores); + int fill_vector_index_data( + const int64_t snapshot_version, + const ObStorageSchema *storage_schema, + const SCN &start_scn, + const int64_t lob_inrow_threshold, + ObInsertMonitor* insert_monitor); void set_row_offset(const int64_t row_offset) { row_offset_ = row_offset; } int64_t get_row_count() const { return nullptr == slice_store_ ? 0 : slice_store_->get_row_count(); } int64_t get_row_offset() const { return row_offset_; } @@ -622,7 +691,9 @@ private: const int64_t dir_id, const int64_t parallelism, const ObStorageSchema *storage_schema, - const share::SCN &start_scn); + const share::SCN &start_scn, + const ObString vec_idx_param, + const int64_t vec_dim); int report_unique_key_dumplicated( const int ret_code, const uint64_t table_id, diff --git a/src/storage/lob/ob_lob_locator.cpp b/src/storage/lob/ob_lob_locator.cpp index 8a6912b0cf..e115e4d215 100644 --- a/src/storage/lob/ob_lob_locator.cpp +++ b/src/storage/lob/ob_lob_locator.cpp @@ -20,6 +20,8 @@ #include "storage/tx/ob_trans_service.h" #include "share/ob_lob_access_utils.h" #include "observer/ob_server.h" +#include "storage/tx_storage/ob_ls_service.h" + namespace oceanbase { @@ -40,7 +42,8 @@ ObLobLocatorHelper::ObLobLocatorHelper() rowkey_str_(), enable_locator_v2_(), is_inited_(false), - scan_flag_() + scan_flag_(), + is_access_index_(false) { } @@ -62,6 +65,7 @@ void ObLobLocatorHelper::reset() enable_locator_v2_ = false; is_inited_ = false; scan_flag_.reset(); + is_access_index_ = false; } int ObLobLocatorHelper::init(const ObTableScanParam &scan_param, @@ -100,6 +104,7 @@ int ObLobLocatorHelper::init(const ObTableScanParam &scan_param, read_snapshot_ = ctx.mvcc_acc_ctx_.snapshot_; enable_locator_v2_ = table_param.enable_lob_locator_v2(); scan_flag_ = scan_param.scan_flag_; + is_access_index_ = table_param.is_vec_index(); if (snapshot_version != read_snapshot_.version_.get_val_for_tx()) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "snapshot version mismatch", @@ -137,6 +142,7 @@ int ObLobLocatorHelper::init(const uint64_t table_id, enable_locator_v2_ = true; // must be called en locator v2 enabled OB_ASSERT(ob_enable_lob_locator_v2() == true); is_inited_ = true; + is_access_index_ = false; // OB_ASSERT(snapshot_version == ctx.mvcc_acc_ctx_.snapshot_.version_); // snapshot_version mismatch in test_multi_version_sstable_single_get } @@ -576,8 +582,26 @@ int ObLobLocatorHelper::build_lob_locatorv2(ObLobLocatorV2 &locator, if (retry_info.read_latest_) { tx_info.snapshot_seq_ = ObSequence::get_max_seq_no(); } - ObMemLobLocationInfo location_info(tablet_id_, ls_id_, cs_type); - if (has_extern && OB_FAIL(locator.set_table_info(table_id_, column_id))) { // should be column idx + // if scan with index, get data tablet id + common::ObTabletID target_tablet_id(tablet_id_); + if (is_access_index_) { + share::ObLSID tmp_ls_id(ls_id_); + ObLSHandle ls_handle; + ObTabletHandle tablet_handle; + if (OB_FAIL(MTL(ObLSService *)->get_ls(tmp_ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); + } else if (OB_ISNULL(ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("ls should not be null", K(ret)); + } else if (OB_FAIL(ls_handle.get_ls()->get_tablet(target_tablet_id, tablet_handle))) { + LOG_WARN("fail to get tablet handle", K(ret), K(target_tablet_id)); + } else { + target_tablet_id = tablet_handle.get_obj()->get_data_tablet_id(); + } + } + ObMemLobLocationInfo location_info(target_tablet_id.id(), ls_id_, cs_type); + if (OB_FAIL(ret)) { + } else if (has_extern && OB_FAIL(locator.set_table_info(table_id_, column_id))) { // should be column idx STORAGE_LOG(WARN, "Lob: set table info failed", K(ret), K(table_id_), K(column_id)); } else if (extern_flags.has_tx_info_ && OB_FAIL(locator.set_tx_info(tx_info))) { STORAGE_LOG(WARN, "Lob: set transaction info failed", K(ret), K(tx_info)); diff --git a/src/storage/lob/ob_lob_locator.h b/src/storage/lob/ob_lob_locator.h index 0f55dc69f4..18a363e509 100644 --- a/src/storage/lob/ob_lob_locator.h +++ b/src/storage/lob/ob_lob_locator.h @@ -59,7 +59,7 @@ public: OB_INLINE bool is_valid() const { return is_inited_; } OB_INLINE bool enable_lob_locator_v2() const { return enable_locator_v2_; } TO_STRING_KV(K_(table_id), K_(ls_id), K_(snapshot_version), K_(rowid_version), - KPC(rowid_project_), K_(enable_locator_v2), K_(is_inited), K_(scan_flag)); + KPC(rowid_project_), K_(enable_locator_v2), K_(is_inited), K_(scan_flag), K_(is_access_index)); private: static const int64_t DEFAULT_LOCATOR_OBJ_ARRAY_SIZE = 8; static const int64_t LOB_FORCE_INROW_SIZE = 64 * 1024L; // 64K @@ -96,6 +96,7 @@ private: bool enable_locator_v2_; bool is_inited_; ObQueryFlag scan_flag_; + bool is_access_index_; }; } // namespace storage diff --git a/src/storage/lob/ob_lob_util.cpp b/src/storage/lob/ob_lob_util.cpp index f1c6769df1..9e9e682aff 100644 --- a/src/storage/lob/ob_lob_util.cpp +++ b/src/storage/lob/ob_lob_util.cpp @@ -369,6 +369,62 @@ int ObInsertLobColumnHelper::insert_lob_column(ObIAllocator &allocator, return ret; } +int ObInsertLobColumnHelper::delete_lob_column(ObIAllocator &allocator, + const share::ObLSID ls_id, + const common::ObTabletID tablet_id, + const ObCollationType& collation_type, + blocksstable::ObStorageDatum &datum, + const int64_t timeout_ts, + const bool has_lob_header) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + + ObTxDesc *tx_desc = nullptr; + ObLobManager *lob_mngr = MTL(ObLobManager*); + ObTransService *txs = MTL(transaction::ObTransService*); + ObTxReadSnapshot snapshot; + if (OB_ISNULL(lob_mngr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get lob manager handle.", K(ret)); + } else { + ObString data = datum.get_string(); + // datum with null ptr and zero len should treat as no lob header + bool set_has_lob_header = has_lob_header && data.length() > 0; + ObLobLocatorV2 lob(data, set_has_lob_header); + int64_t byte_len = 0; + if (lob.has_inrow_data()) { + // delete inrow lob no need to use the lob manager + } else { + if (OB_FAIL(start_trans(ls_id, false/*is_for_read*/, timeout_ts, tx_desc))) { + LOG_WARN("fail to get tx_desc", K(ret)); + } else if (OB_FAIL(txs->get_ls_read_snapshot(*tx_desc, transaction::ObTxIsolationLevel::RC, ls_id, timeout_ts, snapshot))) { + LOG_WARN("fail to get snapshot", K(ret)); + } else { + // 4.0 text tc compatiable + ObLobAccessParam lob_param; + lob_param.tx_desc_ = tx_desc; + lob_param.tablet_id_ = tablet_id; + if (!lob.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid src lob locator.", K(ret)); + } else if (OB_FAIL(lob_mngr->build_lob_param(lob_param, allocator, collation_type, 0, UINT64_MAX, timeout_ts, lob))) { + LOG_WARN("fail to build lob param.", K(ret)); + } else if (OB_FAIL(lob_mngr->erase(lob_param))) { + LOG_WARN("lob meta row delete failed.", K(ret)); + } else { + datum.set_lob_data(*lob_param.lob_common_, lob_param.handle_size_); + } + } + if (OB_SUCCESS != (tmp_ret = end_trans(tx_desc, OB_SUCCESS != ret, timeout_ts))) { + ret = tmp_ret; + LOG_WARN("fail to end trans", K(ret), KPC(tx_desc)); + } + } + } + return ret; +} + OB_DEF_SERIALIZE_SIZE(ObLobChunkIndex) { int64_t len = 0; diff --git a/src/storage/lob/ob_lob_util.h b/src/storage/lob/ob_lob_util.h index d5cbf090f7..3ba503a810 100644 --- a/src/storage/lob/ob_lob_util.h +++ b/src/storage/lob/ob_lob_util.h @@ -290,6 +290,13 @@ public: const int64_t timeout_ts, const bool has_lob_header, const uint64_t src_tenant_id); + static int delete_lob_column(ObIAllocator &allocator, + const share::ObLSID ls_id, + const common::ObTabletID tablet_id, + const ObCollationType& collation_type, + blocksstable::ObStorageDatum &datum, + const int64_t timeout_ts, + const bool has_lob_header); static int insert_lob_column(ObIAllocator &allocator, const share::ObLSID ls_id, const common::ObTabletID tablet_id, diff --git a/src/storage/ls/ob_ls.cpp b/src/storage/ls/ob_ls.cpp index 9fddf641fe..9d3e5d78e9 100755 --- a/src/storage/ls/ob_ls.cpp +++ b/src/storage/ls/ob_ls.cpp @@ -1048,6 +1048,8 @@ int ObLS::register_user_service() LOG_WARN("fail to init tablet ttl manager", KR(ret)); } else { REGISTER_TO_LOGSERVICE(TTL_LOG_BASE_TYPE, &tablet_ttl_mgr_); + // reuse ttl timer + REGISTER_TO_LOGSERVICE(VEC_INDEX_LOG_BASE_TYPE, &tablet_ttl_mgr_.get_vector_idx_scheduler()); } } } @@ -1172,6 +1174,7 @@ void ObLS::unregister_user_service_() UNREGISTER_FROM_LOGSERVICE(TABLE_LOAD_RESOURCE_SERVICE_LOG_BASE_TYPE, MTL(observer::ObTableLoadResourceService *)); } if (ls_meta_.ls_id_.is_user_ls()) { + UNREGISTER_FROM_LOGSERVICE(VEC_INDEX_LOG_BASE_TYPE, &tablet_ttl_mgr_.get_vector_idx_scheduler()); UNREGISTER_FROM_LOGSERVICE(TTL_LOG_BASE_TYPE, tablet_ttl_mgr_); tablet_ttl_mgr_.destroy(); } diff --git a/src/storage/ls/ob_ls_tablet_service.cpp b/src/storage/ls/ob_ls_tablet_service.cpp index 46f9083b84..9db049d9bb 100644 --- a/src/storage/ls/ob_ls_tablet_service.cpp +++ b/src/storage/ls/ob_ls_tablet_service.cpp @@ -83,6 +83,8 @@ #include "storage/compaction/ob_tablet_merge_ctx.h" #include "storage/tablet/ob_tablet_mds_table_mini_merger.h" #include "storage/ddl/ob_tablet_ddl_kv.h" +#include "share/vector_index/ob_plugin_vector_index_adaptor.h" +#include "share/vector_index/ob_plugin_vector_index_service.h" using namespace oceanbase::share; using namespace oceanbase::common; @@ -3905,6 +3907,8 @@ int ObLSTabletService::insert_rows_to_tablet( } else { LOG_WARN("fail to check duplicate", K(ret)); } + } else if (OB_FAIL(insert_vector_index_rows(tablet_handle, run_ctx, rows, row_count))) { + LOG_WARN("failed to process vector index rows", K(ret)); } else if (OB_FAIL(insert_lob_tablet_rows(tablet_handle, run_ctx, rows, row_count))) { LOG_WARN("failed to insert rows to lob tablet", K(ret)); } else if (OB_FAIL(insert_tablet_rows(row_count, tablet_handle, run_ctx, rows, rows_info))) { @@ -4141,6 +4145,64 @@ int ObLSTabletService::insert_lob_tablet_rows( return ret; } +int ObLSTabletService::insert_vector_index_rows( + ObTabletHandle &data_tablet, + ObDMLRunningCtx &run_ctx, + blocksstable::ObDatumRow *rows, + int64_t row_count) +{ + int ret = OB_SUCCESS; + if (run_ctx.dml_param_.table_param_->get_data_table().is_vector_delta_buffer()) { + ObString vec_idx_param = run_ctx.dml_param_.table_param_->get_data_table().get_vec_index_param(); + int64_t vec_dim = run_ctx.dml_param_.table_param_->get_data_table().get_vec_dim(); + const uint64_t vec_id_col_id = run_ctx.dml_param_.table_param_->get_data_table().get_vec_id_col_id(); + const uint64_t vec_vector_col_id = run_ctx.dml_param_.table_param_->get_data_table().get_vec_vector_col_id(); + const uint64_t vec_type_col_id = vec_vector_col_id - 1; + LOG_DEBUG("[vec index debug] show vector index params", K(vec_idx_param), K(vec_dim), + K(vec_id_col_id), K(vec_type_col_id), K(vec_vector_col_id)); + // get vector col idx + int64_t vec_id_idx = OB_INVALID_INDEX; + int64_t type_idx = OB_INVALID_INDEX; + int64_t vector_idx = OB_INVALID_INDEX; + for (int64_t i = 0; i < run_ctx.dml_param_.table_param_->get_col_descs().count(); i++) { + uint64_t col_id = run_ctx.dml_param_.table_param_->get_col_descs().at(i).col_id_; + if (col_id == vec_id_col_id) { + vec_id_idx = i; + } else if (col_id == vec_type_col_id) { + type_idx = i; + } else if (col_id == vec_vector_col_id) { + vector_idx = i; + } + } + if (OB_UNLIKELY(vec_id_idx == OB_INVALID_INDEX || type_idx == OB_INVALID_INDEX || vector_idx == OB_INVALID_INDEX)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get vec index column idxs", K(ret), K(vec_id_col_id), K(vec_type_col_id), K(vec_vector_col_id), + K(vec_id_idx), K(type_idx), K(vector_idx)); + } else { + ObPluginVectorIndexService *vec_index_service = MTL(ObPluginVectorIndexService *); + ObPluginVectorIndexAdapterGuard adaptor_guard; + if (OB_FAIL(vec_index_service->acquire_adapter_guard(run_ctx.store_ctx_.ls_id_, + run_ctx.relative_table_.get_tablet_id(), + ObIndexType::INDEX_TYPE_VEC_DELTA_BUFFER_LOCAL, + adaptor_guard, + &vec_idx_param, + vec_dim))) { + LOG_WARN("fail to get ObMockPluginVectorIndexAdapter", K(ret), K(run_ctx.store_ctx_), K(run_ctx.relative_table_)); + } else if (OB_FAIL(adaptor_guard.get_adatper()->insert_rows(rows, vec_id_idx, type_idx, vector_idx, row_count))) { + LOG_WARN("fail to insert vector to adaptor", K(ret), KP(rows), K(row_count)); + } else { + for (int64_t k = 0; OB_SUCC(ret) && k < row_count; k++) { + // process for each row or call batch + LOG_DEBUG("show all vector del buffer row for insert", K(rows[k].storage_datums_)); + // set vector null for not to storage + rows[k].storage_datums_[vector_idx].set_null(); + } + } + } + } + return ret; +} + int ObLSTabletService::extract_rowkey( const ObRelativeTable &table, const blocksstable::ObDatumRowkey &rowkey, @@ -4594,6 +4656,8 @@ int ObLSTabletService::update_row_to_tablet( LOG_WARN("fail to process old row", K(ret), K(col_descs), K(old_datum_row), K(data_tbl_rowkey_change)); } + } else if (OB_FAIL(insert_vector_index_rows(tablet_handle, run_ctx, &new_datum_row, 1))) { + LOG_WARN("failed to process vector index insert", K(ret), K(new_datum_row)); } else if (OB_FAIL(process_lob_row(tablet_handle, run_ctx, update_idx, @@ -5138,6 +5202,8 @@ int ObLSTabletService::insert_row_to_tablet( } else if (GCONF.enable_defensive_check() && OB_FAIL(check_new_row_legitimacy(run_ctx, datum_row))) { LOG_WARN("check new row legitimacy failed", K(ret), K(datum_row)); + } else if (OB_FAIL(insert_vector_index_rows(tablet_handle, run_ctx, &datum_row, 1))) { + LOG_WARN("failed to process vector index rows", K(ret)); } else if (OB_FAIL(insert_lob_tablet_row(tablet_handle, run_ctx, datum_row))) { LOG_WARN("failed to write lob tablets rows", K(ret)); } else { diff --git a/src/storage/ls/ob_ls_tablet_service.h b/src/storage/ls/ob_ls_tablet_service.h index 3bbc572c89..54160774d0 100644 --- a/src/storage/ls/ob_ls_tablet_service.h +++ b/src/storage/ls/ob_ls_tablet_service.h @@ -606,6 +606,11 @@ private: ObDMLRunningCtx &run_ctx, blocksstable::ObDatumRow *rows, int64_t row_count); + static int insert_vector_index_rows( + ObTabletHandle &data_tablet, + ObDMLRunningCtx &run_ctx, + blocksstable::ObDatumRow *rows, + int64_t row_count); static int extract_rowkey( const ObRelativeTable &table, const common::ObStoreRowkey &rowkey, diff --git a/src/storage/ob_storage_schema.h b/src/storage/ob_storage_schema.h index 52b0692801..4b459976e9 100644 --- a/src/storage/ob_storage_schema.h +++ b/src/storage/ob_storage_schema.h @@ -233,6 +233,7 @@ public: inline bool is_materialized_view() const { return share::schema::ObTableSchema::is_materialized_view(table_type_); } inline bool is_mlog_table() const { return share::schema::ObTableSchema::is_mlog_table(table_type_); } inline bool is_fts_index() const { return share::schema::is_fts_index(index_type_); } + inline bool is_vec_index() const { return share::schema::is_vec_index(index_type_); } inline bool is_user_data_table() const { return share::schema::ObTableSchema::is_user_data_table(table_type_); } virtual inline bool is_global_index_table() const override { return share::schema::ObSimpleTableSchemaV2::is_global_index_table(index_type_); } virtual inline int64_t get_block_size() const override { return block_size_; } diff --git a/src/storage/ob_storage_util.cpp b/src/storage/ob_storage_util.cpp index f966b2f1b3..8a354f18ff 100644 --- a/src/storage/ob_storage_util.cpp +++ b/src/storage/ob_storage_util.cpp @@ -17,6 +17,7 @@ #include "share/vector/ob_discrete_format.h" #include "sql/engine/basic/ob_pushdown_filter.h" #include "sql/engine/ob_exec_context.h" +#include "sql/engine/expr/ob_array_expr_utils.h" #include "storage/blocksstable/ob_datum_row.h" namespace oceanbase @@ -334,6 +335,12 @@ int pad_on_rich_format_columns(const common::ObAccuracy accuracy, return ret; } +int distribute_attrs_on_rich_format_columns(const int64_t row_count, const int64_t vec_offset, + sql::ObExpr &expr, sql::ObEvalCtx &eval_ctx) +{ + return ObArrayExprUtils::batch_dispatch_array_attrs(eval_ctx, expr, vec_offset, row_count); +} + int cast_obj(const common::ObObjMeta &src_meta, common::ObIAllocator &cast_allocator, common::ObObj &obj) @@ -430,7 +437,11 @@ int init_exprs_new_format_header( int ret = OB_SUCCESS; for (int64_t i = 0; OB_SUCC(ret) && i < cols_projector.count(); ++i) { sql::ObExpr *expr = exprs.at(i); - if (OB_FAIL(expr->init_vector_default(eval_ctx, eval_ctx.max_batch_size_))) { + if (expr->is_nested_expr()) { + if (OB_FAIL(expr->init_vector(eval_ctx, VEC_DISCRETE, eval_ctx.max_batch_size_))) { + STORAGE_LOG(WARN, "Failed to init vector", K(ret), K(i), KPC(exprs.at(i))); + } + } else if (OB_FAIL(expr->init_vector_default(eval_ctx, eval_ctx.max_batch_size_))) { STORAGE_LOG(WARN, "Failed to init vector", K(ret), K(i), KPC(exprs.at(i))); } } diff --git a/src/storage/ob_storage_util.h b/src/storage/ob_storage_util.h index 4de56f22a5..324a7364ba 100644 --- a/src/storage/ob_storage_util.h +++ b/src/storage/ob_storage_util.h @@ -122,6 +122,9 @@ OB_INLINE int init_exprs_uniform_header( return ret; } +int distribute_attrs_on_rich_format_columns(const int64_t row_count, const int64_t vec_offset, + sql::ObExpr &expr, sql::ObEvalCtx &eval_ctx); + int init_exprs_new_format_header( const common::ObIArray &cols_projector, const sql::ObExprPtrIArray &exprs, diff --git a/src/storage/tablelock/ob_table_lock_common.h b/src/storage/tablelock/ob_table_lock_common.h index ca20e404c9..d7f908e6d7 100644 --- a/src/storage/tablelock/ob_table_lock_common.h +++ b/src/storage/tablelock/ob_table_lock_common.h @@ -281,6 +281,7 @@ enum class ObLockOBJType : char OBJ_TYPE_DBMS_LOCK = 11, // for dbms lock OBJ_TYPE_MATERIALIZED_VIEW = 12, // for materialized view operations OBJ_TYPE_MYSQL_LOCK_FUNC = 13, // for mysql lock function + OBJ_TYPE_REFRESH_VECTOR_INDEX = 14, OBJ_TYPE_MAX }; @@ -343,6 +344,10 @@ int lock_obj_type_to_string(const ObLockOBJType obj_type, strncpy(str, "MYSQL_LOCK_FUNC", str_len); break; } + case ObLockOBJType::OBJ_TYPE_REFRESH_VECTOR_INDEX: { + strncpy(str, "REFRESH_VECTOR_INDEX", str_len); + break; + } default: { strncpy(str, "UNKNOWN", str_len); } diff --git a/src/storage/tablelock/ob_table_lock_rpc_struct.cpp b/src/storage/tablelock/ob_table_lock_rpc_struct.cpp index 8bbc219f3b..3d186f08cb 100644 --- a/src/storage/tablelock/ob_table_lock_rpc_struct.cpp +++ b/src/storage/tablelock/ob_table_lock_rpc_struct.cpp @@ -256,7 +256,8 @@ bool ObLockParam::is_valid() const || ObLockOBJType::OBJ_TYPE_OBJECT_NAME == lock_id_.obj_type_ || ObLockOBJType::OBJ_TYPE_DBMS_LOCK == lock_id_.obj_type_ || ObLockOBJType::OBJ_TYPE_MATERIALIZED_VIEW == lock_id_.obj_type_ - || ObLockOBJType::OBJ_TYPE_MYSQL_LOCK_FUNC == lock_id_.obj_type_))); + || ObLockOBJType::OBJ_TYPE_MYSQL_LOCK_FUNC == lock_id_.obj_type_ + || ObLockOBJType::OBJ_TYPE_REFRESH_VECTOR_INDEX == lock_id_.obj_type_))); } void ObLockRequest::reset() diff --git a/src/storage/vector_index/cmd/ob_vector_refresh_index_executor.cpp b/src/storage/vector_index/cmd/ob_vector_refresh_index_executor.cpp new file mode 100644 index 0000000000..b94662f683 --- /dev/null +++ b/src/storage/vector_index/cmd/ob_vector_refresh_index_executor.cpp @@ -0,0 +1,739 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +// #define DBMS_VECTOR_MOCK_TEST +#define USING_LOG_PREFIX STORAGE + +#include "storage/vector_index/cmd/ob_vector_refresh_index_executor.h" +#include "lib/utility/ob_print_utils.h" +#include "sql/engine/ob_exec_context.h" +#include "storage/vector_index/ob_vector_index_refresh.h" +#include "pl/ob_pl.h" +#include "share/ob_vec_index_builder_util.h" + +namespace oceanbase { +namespace storage { + +ObVectorRefreshIndexExecutor::ObVectorRefreshIndexExecutor() + : ctx_(nullptr), session_info_(nullptr), tenant_id_(OB_INVALID_TENANT_ID) {} + +ObVectorRefreshIndexExecutor::~ObVectorRefreshIndexExecutor() {} + +int ObVectorRefreshIndexExecutor::execute_refresh( + pl::ObPLExecCtx &ctx, const ObVectorRefreshIndexArg &arg) { + int ret = OB_SUCCESS; + ctx_ = ctx.exec_ctx_; + pl_ctx_ = &ctx; + CK(OB_NOT_NULL(session_info_ = ctx_->get_my_session())); + CK(OB_NOT_NULL(ctx_->get_sql_ctx()->schema_guard_)); + OV(OB_LIKELY(arg.is_valid()), OB_INVALID_ARGUMENT, arg); + OZ(schema_checker_.init(*(ctx_->get_sql_ctx()->schema_guard_), + session_info_->get_sessid())); + OX(tenant_id_ = session_info_->get_effective_tenant_id()); + OZ(ObVectorRefreshIndexExecutor::check_min_data_version( + tenant_id_, DATA_VERSION_4_3_0_0, + "tenant's data version is below 4.3.0.0, refreshing vector index is not " + "supported.")); + OZ(resolve_refresh_arg(arg)); + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(do_refresh())) { + LOG_WARN("fail to do refresh", KR(ret)); + } + return ret; +} + +int ObVectorRefreshIndexExecutor::execute_rebuild( + pl::ObPLExecCtx &ctx, const ObVectorRebuildIndexArg &arg) { + int ret = OB_SUCCESS; + ctx_ = ctx.exec_ctx_; + pl_ctx_ = &ctx; + CK(OB_NOT_NULL(session_info_ = ctx_->get_my_session())); + CK(OB_NOT_NULL(ctx_->get_sql_ctx()->schema_guard_)); + OV(OB_LIKELY(arg.is_valid()), OB_INVALID_ARGUMENT, arg); + OZ(schema_checker_.init(*(ctx_->get_sql_ctx()->schema_guard_), + session_info_->get_sessid())); + OX(tenant_id_ = session_info_->get_effective_tenant_id()); + OZ(ObVectorRefreshIndexExecutor::check_min_data_version( + tenant_id_, DATA_VERSION_4_3_0_0, + "tenant's data version is below 4.3.0.0, refreshing vector index is not " + "supported.")); + OZ(resolve_rebuild_arg(arg)); + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(do_rebuild())) { + LOG_WARN("fail to do refresh", KR(ret)); + } + return ret; +} + +int ObVectorRefreshIndexExecutor::check_min_data_version( + const uint64_t tenant_id, const uint64_t min_data_version, + const char *errmsg) { + int ret = OB_SUCCESS; + uint64_t compat_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { + LOG_WARN("fail to get data version", KR(ret), K(tenant_id)); + } else if (OB_UNLIKELY(compat_version < min_data_version)) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("version lower than 4.3 does not support this operation", KR(ret), + K(tenant_id), K(compat_version), K(min_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, errmsg); + } + return ret; +} + +void ObVectorRefreshIndexExecutor::upper_db_table_name( + const ObNameCaseMode case_mode, const bool is_oracle_mode, ObString &name) { + if (is_oracle_mode) { + str_toupper(name.ptr(), name.length()); + } else { + if (OB_LOWERCASE_AND_INSENSITIVE == case_mode) { + str_tolower(name.ptr(), name.length()); + } + } +} + +int ObVectorRefreshIndexExecutor::resolve_table_name( + const ObCollationType cs_type, const ObNameCaseMode case_mode, + const bool is_oracle_mode, const ObString &name, ObString &database_name, + ObString &table_name) { + int ret = OB_SUCCESS; + static const char split_character = '.'; + database_name.reset(); + table_name.reset(); + if (OB_UNLIKELY(name.empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), K(name)); + } else { + ObString name_str = name; + const char *p = name_str.find(split_character); + if (p == nullptr) { + table_name = name_str; + } else { + database_name = name_str.split_on(p); + table_name = name_str; + if (OB_UNLIKELY(database_name.empty() || table_name.empty() || + nullptr != table_name.find(split_character))) { + ret = OB_WRONG_TABLE_NAME; + LOG_WARN("wrong table name", KR(ret), K(name)); + } + } + if (OB_SUCC(ret)) { + const bool preserve_lettercase = + is_oracle_mode ? true : (case_mode != OB_LOWERCASE_AND_INSENSITIVE); + upper_db_table_name(case_mode, is_oracle_mode, database_name); + upper_db_table_name(case_mode, is_oracle_mode, table_name); + if (!database_name.empty() && + OB_FAIL(ObSQLUtils::check_and_convert_db_name( + cs_type, preserve_lettercase, database_name))) { + LOG_WARN("fail to check and convert database name", KR(ret), + K(database_name)); + } else if (OB_FAIL(ObSQLUtils::check_and_convert_table_name( + cs_type, preserve_lettercase, table_name, + is_oracle_mode))) { + LOG_WARN("fail to check and convert table name", KR(ret), K(cs_type), + K(preserve_lettercase), K(table_name)); + } + } + } + return ret; +} + +int ObVectorRefreshIndexExecutor::generate_vector_aux_index_name( + VectorIndexAuxType index_type, const uint64_t data_table_id, + const ObString &index_name, ObString &real_index_name) { + int ret = OB_SUCCESS; + char *name_buf = nullptr; + ObIAllocator *allocator = pl_ctx_->allocator_; + if (OB_ISNULL(name_buf = static_cast( + allocator->alloc(OB_MAX_TABLE_NAME_LENGTH)))) { + ret = common::OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc mem", K(ret)); + } else { + int64_t pos = 0; + ObString suffix_index_name; + if (VectorIndexAuxType::DELTA_BUF_INDEX == index_type) { + if (OB_FAIL(databuff_printf(name_buf, OB_MAX_TABLE_NAME_LENGTH, pos, + "%.*s%s", + index_name.length(), index_name.ptr(), + ObVecIndexBuilderUtil::DELTA_BUFFER_TABLE_NAME_SUFFIX))) { + LOG_WARN("failed to print", K(ret)); + } + } else if (VectorIndexAuxType::INDEX_ID_INDEX == index_type) { + if (OB_FAIL(databuff_printf(name_buf, OB_MAX_TABLE_NAME_LENGTH, pos, + "%.*s%s", index_name.length(), + index_name.ptr(), + ObVecIndexBuilderUtil::INDEX_ID_TABLE_NAME_SUFFIX))) { + LOG_WARN("failed to print", K(ret)); + } + } else if (VectorIndexAuxType::MOCK_INDEX_1 == index_type) { + if (OB_FAIL(databuff_printf(name_buf, OB_MAX_TABLE_NAME_LENGTH, pos, + "%.*s1", index_name.length(), + index_name.ptr()))) { + LOG_WARN("failed to print", K(ret)); + } + } else if (VectorIndexAuxType::MOCK_INDEX_2 == index_type) { + if (OB_FAIL(databuff_printf(name_buf, OB_MAX_TABLE_NAME_LENGTH, pos, + "%.*s2", index_name.length(), + index_name.ptr()))) { + LOG_WARN("failed to print", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (FALSE_IT(suffix_index_name.assign_ptr( + name_buf, static_cast(pos)))) { + } else if (OB_FAIL(ObTableSchema::build_index_table_name( + *allocator, data_table_id, suffix_index_name, + real_index_name))) { + LOG_WARN("fail to build index table name", K(ret), K(data_table_id), + K(suffix_index_name), K(real_index_name)); + } + } + return ret; +} + +int ObVectorRefreshIndexExecutor::mock_check_idx_col_name( + const ObString &idx_col_name, + const share::schema::ObTableSchema *&base_table_schema, + const share::schema::ObTableSchema *&delta_buf_table_schema, + const share::schema::ObTableSchema *&index_id_table_schema) { + int ret = OB_SUCCESS; + const ObIndexInfo &delta_table_idx_info = + delta_buf_table_schema->get_index_info(); + const ObIndexInfo &index_id_idx_info = + index_id_table_schema->get_index_info(); + uint64_t delta_table_idx_col_id = OB_INVALID_ID; + uint64_t index_id_idx_col_id = OB_INVALID_ID; + const ObColumnSchemaV2 *col_schema = nullptr; + if (OB_UNLIKELY(1 != delta_table_idx_info.get_size() || + 1 != index_id_idx_info.get_size())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("the count of index column is not 1", KR(ret), + K(delta_table_idx_info.get_size()), + K(index_id_idx_info.get_size())); + } else if (FALSE_IT(delta_table_idx_col_id = + delta_table_idx_info.get_column(0)->column_id_)) { + } else if (FALSE_IT(index_id_idx_col_id = + index_id_idx_info.get_column(0)->column_id_)) { + } else if (OB_UNLIKELY(delta_table_idx_col_id != index_id_idx_col_id)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("delta_buf_table & index_id_idx have different index column id", + KR(ret), K(delta_table_idx_col_id), K(index_id_idx_col_id)); + } else if (FALSE_IT(col_schema = base_table_schema->get_column_schema( + delta_table_idx_col_id))) { + } else if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column not exist", KR(ret), K(delta_table_idx_col_id)); + } else if (OB_UNLIKELY(idx_col_name != col_schema->get_column_name_str())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("idx column name does not match with index table", KR(ret), + K(idx_col_name), K(col_schema->get_column_name_str())); + } + return ret; +} + +int ObVectorRefreshIndexExecutor::check_idx_col_name( + const ObString &idx_col_name, + const share::schema::ObTableSchema *&base_table_schema, + const share::schema::ObTableSchema *&delta_buf_table_schema, + const share::schema::ObTableSchema *&index_id_table_schema) { + int ret = OB_SUCCESS; + const ObIndexInfo &delta_table_idx_info = + delta_buf_table_schema->get_index_info(); + const ObIndexInfo &index_id_idx_info = + index_id_table_schema->get_index_info(); + uint64_t delta_table_idx_col_id = OB_INVALID_ID; + uint64_t index_id_idx_col_id = OB_INVALID_ID; + const ObColumnSchemaV2 *col_schema = nullptr; + if (OB_UNLIKELY(1 != delta_table_idx_info.get_size() || + 1 != index_id_idx_info.get_size())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("the count of index column is not 1", KR(ret), + K(delta_table_idx_info.get_size()), + K(index_id_idx_info.get_size())); + } else if (FALSE_IT(delta_table_idx_col_id = + delta_table_idx_info.get_column(0)->column_id_)) { + } else if (FALSE_IT(index_id_idx_col_id = + index_id_idx_info.get_column(0)->column_id_)) { + } else if (OB_UNLIKELY(delta_table_idx_col_id != index_id_idx_col_id)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("delta_buf_table & index_id_idx have different index column id", + KR(ret), K(delta_table_idx_col_id), K(index_id_idx_col_id)); + } else if (FALSE_IT(col_schema = base_table_schema->get_column_schema( + delta_table_idx_col_id))) { + } else if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column not exist", KR(ret), K(delta_table_idx_col_id)); + } else if (OB_UNLIKELY(idx_col_name != col_schema->get_column_name_str())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("idx column name does not match with index table", KR(ret), + K(idx_col_name), K(col_schema->get_column_name_str())); + } + return ret; +} + +int ObVectorRefreshIndexExecutor::mock_resolve_and_check_table_valid( + const ObString &arg_idx_name, const ObString &arg_base_name, + const ObString &idx_col_name, + const share::schema::ObTableSchema *&base_table_schema, + const share::schema::ObTableSchema *&delta_buf_table_schema, + const share::schema::ObTableSchema *&index_id_table_schema) { + int ret = OB_SUCCESS; + ObNameCaseMode case_mode = OB_NAME_CASE_INVALID; + ObCollationType cs_type = CS_TYPE_INVALID; + if (OB_FAIL(session_info_->get_name_case_mode(case_mode))) { + LOG_WARN("fail to get name case mode", KR(ret)); + } else if (OB_FAIL(session_info_->get_collation_connection(cs_type))) { + LOG_WARN("fail to get collation_connection", KR(ret)); + } else { + ObString base_db_name, base_name, index_db_name, index_name; + ObString new_base_db_name, new_base_name; + ObString delta_buf_table_name, index_id_table_name; + bool has_synonym = false; + base_table_schema = delta_buf_table_schema = index_id_table_schema = nullptr; + uint64_t base_table_id = -1; + if (OB_FAIL(ObVectorRefreshIndexExecutor::resolve_table_name( + cs_type, case_mode, lib::is_oracle_mode(), arg_base_name, + base_db_name, base_name))) { + LOG_WARN("fail to resolve table name", KR(ret), K(cs_type), K(case_mode), + K(arg_base_name)); + LOG_USER_ERROR(OB_WRONG_TABLE_NAME, + static_cast(arg_base_name.length()), + arg_base_name.ptr()); + } else if (OB_FAIL(ObVectorRefreshIndexExecutor::resolve_table_name( + cs_type, case_mode, lib::is_oracle_mode(), arg_idx_name, + index_db_name, index_name))) { + LOG_WARN("fail to resolve table name", KR(ret), K(cs_type), K(case_mode), + K(arg_idx_name)); + LOG_USER_ERROR(OB_WRONG_TABLE_NAME, static_cast(arg_idx_name.length()), + arg_idx_name.ptr()); + } else if (base_db_name.empty() && + FALSE_IT(base_db_name = session_info_->get_database_name())) { + } else if (index_db_name.empty() && + FALSE_IT(index_db_name = session_info_->get_database_name())) { + } else if (OB_UNLIKELY(base_db_name.empty() || index_db_name.empty())) { + ret = OB_ERR_NO_DB_SELECTED; + LOG_WARN("No database selected", KR(ret)); + } else if (OB_UNLIKELY(base_db_name != index_db_name)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("different db name is not supported."); + } else if (OB_FAIL(schema_checker_.get_table_schema_with_synonym( + tenant_id_, base_db_name, base_name, false /*is_index_table*/, + has_synonym, new_base_db_name, new_base_name, + base_table_schema))) { + LOG_WARN("fail to get table schema with synonym", KR(ret), K(base_db_name), + K(base_name)); + } else if (OB_ISNULL(base_table_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("base table not exist", KR(ret), K(base_db_name), K(base_name), + KPC(base_table_schema)); + } else if (FALSE_IT(base_table_id = base_table_schema->get_table_id())) { + } else if (OB_FAIL(generate_vector_aux_index_name( + VectorIndexAuxType::MOCK_INDEX_1, base_table_id, index_name, + delta_buf_table_name))) { + LOG_WARN("fail to generate delta buf index table name", KR(ret), + K(base_table_id), K(index_name)); + } else if (OB_FAIL(generate_vector_aux_index_name( + VectorIndexAuxType::MOCK_INDEX_2, base_table_id, index_name, + index_id_table_name))) { + LOG_WARN("fail to generate index id index table name", KR(ret), + K(base_table_id), K(index_name)); + } else if (OB_FAIL(schema_checker_.get_table_schema( + tenant_id_, index_db_name, delta_buf_table_name, true, + delta_buf_table_schema))) { + LOG_WARN("fail to get table schema", KR(ret), K(index_db_name), + K(delta_buf_table_name)); + } else if (OB_FAIL(schema_checker_.get_table_schema(tenant_id_, index_db_name, + index_id_table_name, true, + index_id_table_schema, + false, /*with_hidden_flag*/ + true /*is_built_in_index*/))) { + LOG_WARN("fail to get table schema", KR(ret), K(index_db_name), + K(index_id_table_name)); + } else if (OB_ISNULL(delta_buf_table_schema) || + OB_ISNULL(index_id_table_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("delta_buf_table or index_id_table is not exist", KR(ret), + K(delta_buf_table_schema), K(index_id_table_schema)); + } else if (!idx_col_name.empty() && + OB_FAIL(mock_check_idx_col_name(idx_col_name, base_table_schema, + delta_buf_table_schema, + index_id_table_schema))) { + LOG_WARN("fail to check idx column name", KR(ret), K(idx_col_name)); + } + } + return ret; +} + +int ObVectorRefreshIndexExecutor::resolve_and_check_table_valid( + const ObString &arg_idx_name, const ObString &arg_base_name, + const ObString &idx_col_name, + const share::schema::ObTableSchema *&base_table_schema, + const share::schema::ObTableSchema *&delta_buf_table_schema, + const share::schema::ObTableSchema *&index_id_table_schema) { + int ret = OB_SUCCESS; + ObNameCaseMode case_mode = OB_NAME_CASE_INVALID; + ObCollationType cs_type = CS_TYPE_INVALID; + if (OB_FAIL(session_info_->get_name_case_mode(case_mode))) { + LOG_WARN("fail to get name case mode", KR(ret)); + } else if (OB_FAIL(session_info_->get_collation_connection(cs_type))) { + LOG_WARN("fail to get collation_connection", KR(ret)); + } else { + ObString base_db_name, base_name, index_db_name, index_name; + ObString new_base_db_name, new_base_name; + ObString delta_buf_table_name, index_id_table_name; + bool has_synonym = false; + base_table_schema = delta_buf_table_schema = index_id_table_schema = nullptr; + uint64_t base_table_id = -1; + ObString base_vector_index_col_name; + if (OB_FAIL(ObVectorRefreshIndexExecutor::resolve_table_name( + cs_type, case_mode, lib::is_oracle_mode(), arg_base_name, + base_db_name, base_name))) { + LOG_WARN("fail to resolve table name", KR(ret), K(cs_type), K(case_mode), + K(arg_base_name)); + LOG_USER_ERROR(OB_WRONG_TABLE_NAME, + static_cast(arg_base_name.length()), + arg_base_name.ptr()); + } else if (OB_FAIL(ObVectorRefreshIndexExecutor::resolve_table_name( + cs_type, case_mode, lib::is_oracle_mode(), arg_idx_name, + index_db_name, index_name))) { + LOG_WARN("fail to resolve table name", KR(ret), K(cs_type), K(case_mode), + K(arg_idx_name)); + LOG_USER_ERROR(OB_WRONG_TABLE_NAME, static_cast(arg_idx_name.length()), + arg_idx_name.ptr()); + } else if (base_db_name.empty() && + FALSE_IT(base_db_name = session_info_->get_database_name())) { + } else if (index_db_name.empty() && + FALSE_IT(index_db_name = session_info_->get_database_name())) { + } else if (OB_UNLIKELY(base_db_name.empty() || index_db_name.empty())) { + ret = OB_ERR_NO_DB_SELECTED; + LOG_WARN("No database selected", KR(ret)); + } else if (OB_UNLIKELY(base_db_name != index_db_name)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("different db name is not supported."); + } else if (OB_FAIL(schema_checker_.get_table_schema_with_synonym( + tenant_id_, base_db_name, base_name, false /*is_index_table*/, + has_synonym, new_base_db_name, new_base_name, + base_table_schema))) { + LOG_WARN("fail to get table schema with synonym", KR(ret), K(base_db_name), + K(base_name)); + } else if (OB_ISNULL(base_table_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("base table not exist", KR(ret), K(base_db_name), K(base_name), + KPC(base_table_schema)); + } else if (FALSE_IT(base_table_id = base_table_schema->get_table_id())) { + } else if (OB_FAIL(generate_vector_aux_index_name( + VectorIndexAuxType::DELTA_BUF_INDEX, base_table_id, index_name, + delta_buf_table_name))) { + LOG_WARN("fail to generate delta buf index table name", KR(ret), + K(base_table_id), K(index_name)); + } else if (OB_FAIL(generate_vector_aux_index_name( + VectorIndexAuxType::INDEX_ID_INDEX, base_table_id, index_name, + index_id_table_name))) { + LOG_WARN("fail to generate index id index table name", KR(ret), + K(base_table_id), K(index_name)); + } else if (OB_FAIL(schema_checker_.get_table_schema( + tenant_id_, index_db_name, delta_buf_table_name, true, + delta_buf_table_schema))) { + LOG_WARN("fail to get table schema", KR(ret), K(index_db_name), + K(delta_buf_table_name)); + } else if (OB_FAIL(schema_checker_.get_table_schema(tenant_id_, index_db_name, + index_id_table_name, true, + index_id_table_schema, + false, /*with_hidden_flag*/ + true /*is_built_in_index*/))) { + LOG_WARN("fail to get table schema", KR(ret), K(index_db_name), + K(index_id_table_name)); + } else if (OB_ISNULL(delta_buf_table_schema) || + OB_ISNULL(index_id_table_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("delta_buf_table or index_id_table is not exist", KR(ret), + K(delta_buf_table_schema), K(index_id_table_schema)); + } else if (!idx_col_name.empty() && + OB_FAIL(get_vector_index_column_name( + base_table_schema, delta_buf_table_schema, + base_vector_index_col_name))) { + LOG_WARN("fail to get vector index column name", KR(ret)); + } else if (!idx_col_name.empty() && + 0 != idx_col_name.case_compare(base_vector_index_col_name)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("vector index column name is not match", KR(ret), K(idx_col_name), + K(base_vector_index_col_name)); + } + } + return ret; +} + +int ObVectorRefreshIndexExecutor::to_refresh_method( + const ObString &arg_refresh_method, + share::schema::ObVectorRefreshMethod &method, bool is_rebuild) { + int ret = OB_SUCCESS; + if (is_rebuild) { + method = schema::ObVectorRefreshMethod::REBUILD_COMPLETE; + } else if (arg_refresh_method.empty() || + 0 == arg_refresh_method.case_compare("FAST")) { + method = schema::ObVectorRefreshMethod::REFRESH_DELTA; + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("Vector index refresh method is not supported.", KR(ret), + K(arg_refresh_method)); + } + return ret; +} + +int ObVectorRefreshIndexExecutor::to_vector_index_organization( + const ObString &idx_organization_str, + share::schema::ObVectorIndexOrganization &idx_organization) { + int ret = OB_SUCCESS; + if (idx_organization_str.empty() || + 0 == idx_organization_str.case_compare("IN MEMORY NEIGHBOR GRAPH")) { + idx_organization = ObVectorIndexOrganization::IN_MEMORY_NEIGHBOR_GRAPH; + } else if (0 == idx_organization_str.case_compare("NEIGHBOR PARTITION")) { + idx_organization = ObVectorIndexOrganization::NEIGHBOR_PARTITION; + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("Vector index organization is not supported.", KR(ret), + K(idx_organization_str)); + } + return ret; +} + +int ObVectorRefreshIndexExecutor::to_vector_index_distance_metric( + const ObString &idx_distance_metric_str, + share::schema::ObVetcorIndexDistanceMetric &idx_distance_metric) { + int ret = OB_SUCCESS; + if (idx_distance_metric_str.empty() || + 0 == idx_distance_metric_str.case_compare("EUCLIDEAN")) { + idx_distance_metric = ObVetcorIndexDistanceMetric::EUCLIDEAN; + } else if (0 == idx_distance_metric_str.case_compare("EUCLIDEAN_SQUARED")) { + idx_distance_metric = ObVetcorIndexDistanceMetric::EUCLIDEAN_SQUARED; + } else if (0 == idx_distance_metric_str.case_compare("DOT")) { + idx_distance_metric = ObVetcorIndexDistanceMetric::DOT; + } else if (0 == idx_distance_metric_str.case_compare("COSINE")) { + idx_distance_metric = ObVetcorIndexDistanceMetric::COSINE; + } else if (0 == idx_distance_metric_str.case_compare("MANHATTAN")) { + idx_distance_metric = ObVetcorIndexDistanceMetric::MANHATTAN; + } else if (0 == idx_distance_metric_str.case_compare("HAMMING")) { + idx_distance_metric = ObVetcorIndexDistanceMetric::HAMMING; + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("Vector index distance metrics is not supported.", KR(ret), + K(idx_distance_metric_str)); + } + return ret; +} + +int ObVectorRefreshIndexExecutor::get_vector_index_column_name( + const share::schema::ObTableSchema *base_table_schema, + const share::schema::ObTableSchema *delta_buf_table_schema, + ObString &col_name) { + int ret = OB_SUCCESS; + col_name.reset(); + if (OB_UNLIKELY(!delta_buf_table_schema->is_vec_delta_buffer_type())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table is not a delta_buf_table", KR(ret)); + } else { + ObSEArray col_names; + if (OB_FAIL(ObVectorIndexUtil::get_vector_index_column_name(*base_table_schema, + *delta_buf_table_schema, + col_names))) { + LOG_WARN("fail to get vector index col name", K(ret)); + } else if (col_names.count() < 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get col name array wrong", K(col_names)); + } else { + col_name = col_names.at(0); + } + } + return ret; +} + +int ObVectorRefreshIndexExecutor::is_refresh_retry_ret_code(int ret_code) { + return OB_OLD_SCHEMA_VERSION == ret_code || OB_EAGAIN == ret_code || + OB_INVALID_QUERY_TIMESTAMP == ret_code || + OB_TASK_EXPIRED == ret_code || is_master_changed_error(ret_code) || + is_partition_change_error(ret_code) || + is_ddl_stmt_packet_retry_err(ret_code); +} + +int ObVectorRefreshIndexExecutor::resolve_refresh_arg( + const ObVectorRefreshIndexArg &arg) { + int ret = OB_SUCCESS; + const share::schema::ObTableSchema *base_table_schema = nullptr; + const share::schema::ObTableSchema *delta_buf_table_schema = nullptr; + const share::schema::ObTableSchema *index_id_table_schema = nullptr; + +#ifdef DBMS_VECTOR_MOCK_TEST + if (OB_FAIL(mock_resolve_and_check_table_valid( + arg.idx_name_, arg.table_name_, arg.idx_vector_col_, + base_table_schema, delta_buf_table_schema, index_id_table_schema))) +#else + if (OB_FAIL(resolve_and_check_table_valid( + arg.idx_name_, arg.table_name_, arg.idx_vector_col_, + base_table_schema, delta_buf_table_schema, index_id_table_schema))) +#endif + { + LOG_WARN("fail to resolve and check table valid", KR(ret), K(arg)); + } else { + base_tb_id_ = base_table_schema->get_table_id(); + delta_buf_tb_id_ = delta_buf_table_schema->get_table_id(); + index_id_tb_id_ = index_id_table_schema->get_table_id(); + refresh_threshold_ = arg.refresh_threshold_; + } + // resolve method + if (OB_SUCC(ret)) { + refresh_method_ = schema::ObVectorRefreshMethod::MAX; + if (OB_FAIL(ObVectorRefreshIndexExecutor::to_refresh_method( + arg.refresh_type_, refresh_method_))) { + LOG_WARN("fail to resolve refresh method", KR(ret)); + } + } + return ret; +} + +int ObVectorRefreshIndexExecutor::resolve_rebuild_arg( + const ObVectorRebuildIndexArg &arg) { + int ret = OB_SUCCESS; + const share::schema::ObTableSchema *base_table_schema = nullptr; + const share::schema::ObTableSchema *delta_buf_table_schema = nullptr; + const share::schema::ObTableSchema *index_id_table_schema = nullptr; + +#ifdef DBMS_VECTOR_MOCK_TEST + if (OB_FAIL(mock_resolve_and_check_table_valid( + arg.idx_name_, arg.table_name_, arg.idx_vector_col_, + base_table_schema, delta_buf_table_schema, index_id_table_schema))) +#else + if (OB_FAIL(resolve_and_check_table_valid( + arg.idx_name_, arg.table_name_, arg.idx_vector_col_, + base_table_schema, delta_buf_table_schema, index_id_table_schema))) +#endif + { + LOG_WARN("fail to resolve and check table valid", KR(ret), K(arg)); + } else { + base_tb_id_ = base_table_schema->get_table_id(); + delta_buf_tb_id_ = delta_buf_table_schema->get_table_id(); + index_id_tb_id_ = index_id_table_schema->get_table_id(); + refresh_method_ = schema::ObVectorRefreshMethod::REBUILD_COMPLETE; + // TODO:(@wangmiao) resolve vector index and check if it is the same as the + // origin parameter. + idx_parameters_ = arg.idx_parameters_; + // TODO:(@wangmiao) idx_parallel_creation is not effective now. + idx_parallel_creation_ = arg.idx_parallel_creation_; + delta_rate_threshold_ = arg.delta_rate_threshold_; + } + // resolve idx_organization + if (OB_SUCC(ret)) { + // TODO:(@wangmiao) check if it is the same as origin idx_organization. + if (OB_FAIL(ObVectorRefreshIndexExecutor::to_vector_index_organization( + arg.idx_organization_, idx_organization_))) { + LOG_WARN("fail to vector index organization", KR(ret)); + } + } + // resolve idx_distance_metric + if (OB_SUCC(ret)) { + // TODO:(@wangmiao) check if it is the same as origin idx_distance_metrics. + if (OB_FAIL(ObVectorRefreshIndexExecutor::to_vector_index_distance_metric( + arg.idx_distance_metrics_, idx_distance_metrics_))) { + LOG_WARN("fail to vector index distance metric", KR(ret)); + } + } + return ret; +} + +int ObVectorRefreshIndexExecutor::do_refresh() { + int ret = OB_SUCCESS; + ObVectorRefreshIndexCtx refresh_ctx; + refresh_ctx.tenant_id_ = tenant_id_; + refresh_ctx.base_tb_id_ = base_tb_id_; + refresh_ctx.delta_buf_tb_id_ = delta_buf_tb_id_; + refresh_ctx.index_id_tb_id_ = index_id_tb_id_; + refresh_ctx.refresh_method_ = refresh_method_; + refresh_ctx.refresh_threshold_ = refresh_threshold_; + + while (OB_SUCC(ret) && OB_SUCC(ctx_->check_status())) { + ObVectorRefreshIdxTransaction trans; + ObVectorIndexRefresher refresher; + if (OB_FAIL(trans.start(ctx_->get_my_session(), ctx_->get_sql_proxy()))) { + LOG_WARN("fail to start trans", KR(ret)); + } else if (FALSE_IT(refresh_ctx.trans_ = &trans)) { + } else if (OB_FAIL(refresher.init(*ctx_, refresh_ctx))) { + LOG_WARN("fail to init refresher", KR(ret), K(refresh_ctx)); + } else if (OB_FAIL(refresher.refresh())) { + LOG_WARN("fail to do refresh", KR(ret), K(refresh_ctx)); + } + if (trans.is_started()) { + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = trans.end(OB_SUCC(ret)))) { + LOG_WARN("failed to commit trans", KR(ret), KR(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + } + if (OB_FAIL(ret)) { + if (ObVectorRefreshIndexExecutor::is_refresh_retry_ret_code(ret)) { + ret = OB_SUCCESS; + refresh_ctx.reuse(); + ob_usleep(1LL * 1000 * 1000); + } + } else { + break; + } + } + return ret; +} + +int ObVectorRefreshIndexExecutor::do_rebuild() { + int ret = OB_SUCCESS; + ObVectorRefreshIndexCtx refresh_ctx; + refresh_ctx.tenant_id_ = tenant_id_; + refresh_ctx.base_tb_id_ = base_tb_id_; + refresh_ctx.delta_buf_tb_id_ = delta_buf_tb_id_; + refresh_ctx.index_id_tb_id_ = index_id_tb_id_; + refresh_ctx.refresh_method_ = refresh_method_; + refresh_ctx.idx_organization_ = idx_organization_; + refresh_ctx.idx_distance_metric_ = idx_distance_metrics_; + refresh_ctx.idx_parameters_ = idx_parameters_; + refresh_ctx.idx_parallel_creation_ = idx_parallel_creation_; + refresh_ctx.delta_rate_threshold_ = delta_rate_threshold_; + + while (OB_SUCC(ret) && OB_SUCC(ctx_->check_status())) { + ObVectorRefreshIdxTransaction trans; + ObVectorIndexRefresher refresher; + if (OB_FAIL(trans.start(ctx_->get_my_session(), ctx_->get_sql_proxy()))) { + LOG_WARN("fail to start trans", KR(ret)); + } else if (FALSE_IT(refresh_ctx.trans_ = &trans)) { + } else if (OB_FAIL(refresher.init(*ctx_, refresh_ctx))) { + LOG_WARN("fail to init refresher", KR(ret), K(refresh_ctx)); + } else if (OB_FAIL(refresher.refresh())) { + LOG_WARN("fail to do refresh", KR(ret), K(refresh_ctx)); + } + if (trans.is_started()) { + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = trans.end(OB_SUCC(ret)))) { + LOG_WARN("failed to commit trans", KR(ret), KR(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + } + if (OB_FAIL(ret)) { + if (ObVectorRefreshIndexExecutor::is_refresh_retry_ret_code(ret)) { + ret = OB_SUCCESS; + refresh_ctx.reuse(); + ob_usleep(1LL * 1000 * 1000); + } + } else { + break; + } + } + return ret; +} + +} // namespace storage +} // namespace oceanbase \ No newline at end of file diff --git a/src/storage/vector_index/cmd/ob_vector_refresh_index_executor.h b/src/storage/vector_index/cmd/ob_vector_refresh_index_executor.h new file mode 100644 index 0000000000..425555d713 --- /dev/null +++ b/src/storage/vector_index/cmd/ob_vector_refresh_index_executor.h @@ -0,0 +1,168 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#pragma once + +#include "share/schema/ob_schema_struct.h" +#include "sql/resolver/ob_schema_checker.h" + +namespace oceanbase { +namespace pl { +struct ObPLExecCtx; +} +namespace sql +{ +class ObExecContext; +} + +namespace storage { + +// ObVectorRefreshIndexArg for DBMS_VECTOR.REFRESH_INDEX +struct ObVectorRefreshIndexArg { +public: + static const int64_t DEFAULT_REFRESH_THRESHOLD = 10000; + ObVectorRefreshIndexArg() : refresh_threshold_(DEFAULT_REFRESH_THRESHOLD) {} + bool is_valid() const { return !idx_name_.empty() && !table_name_.empty(); } + TO_STRING_KV(K_(idx_name), K_(table_name), K_(idx_vector_col), + K_(refresh_type), K_(refresh_threshold)); + +public: + ObString idx_name_; + ObString table_name_; + ObString idx_vector_col_; + ObString refresh_type_; // COMPLETE / FAST. + // If delta_buf_table's row count is greater than refresh_threshold_, refresh + // is triggered. + int64_t refresh_threshold_; +}; + +// ObVectorRebuildIndexArg for DBMS_VECTOR.REBUILD_INDEX +struct ObVectorRebuildIndexArg { +public: + static constexpr double DEFAULT_REBUILD_THRESHOLD = 0.2; + ObVectorRebuildIndexArg() + : delta_rate_threshold_(DEFAULT_REBUILD_THRESHOLD), + idx_parallel_creation_(1) {} + bool is_valid() const { return !idx_name_.empty() && !table_name_.empty(); } + TO_STRING_KV(K_(idx_name), K_(table_name), K_(idx_vector_col), + K_(delta_rate_threshold), K_(idx_organization), + K_(idx_distance_metrics), K_(idx_parameters), + K_(idx_parallel_creation)); + +public: + ObString idx_name_; + ObString table_name_; + ObString idx_vector_col_; + // If (delta_buf_table's row count + index_id_table's row count) / + // data_table's row count is greater than delta_rate_threshold_, rebuild is + // triggered. + double delta_rate_threshold_; + ObString idx_organization_; // DEFAULT: IN MEMORY NEIGHBOR GRAPH + ObString idx_distance_metrics_; // DEFAULT: EUCLIDEAN + ObString idx_parameters_; // parameters for different vector-index algorithm + int64_t idx_parallel_creation_; // DEFAULT: 1 +}; + +class ObVectorRefreshIndexExecutor { +public: + enum class VectorIndexAuxType : int8_t { + DELTA_BUF_INDEX = 0, + INDEX_ID_INDEX = 1, + MOCK_INDEX_1 = 2, + MOCK_INDEX_2 = 3, + }; + + ObVectorRefreshIndexExecutor(); + ~ObVectorRefreshIndexExecutor(); + DISABLE_COPY_ASSIGN(ObVectorRefreshIndexExecutor); + int execute_refresh(pl::ObPLExecCtx &ctx, + const ObVectorRefreshIndexArg &arg); + int execute_rebuild(pl::ObPLExecCtx &ctx, + const ObVectorRebuildIndexArg &arg); + +private: + static int check_min_data_version(const uint64_t tenant_id, + const uint64_t min_data_version, + const char *errmsg); + static int resolve_table_name(const ObCollationType cs_type, + const ObNameCaseMode case_mode, + const bool is_oracle_mode, const ObString &name, + ObString &database_name, ObString &table_name); + static void upper_db_table_name(const ObNameCaseMode case_mode, + const bool is_oracle_mode, ObString &name); + static int to_refresh_method(const ObString &arg_refresh_method, + share::schema::ObVectorRefreshMethod &method, + bool is_rebuild = false); + static int to_vector_index_organization( + const ObString &idx_organization_str, + share::schema::ObVectorIndexOrganization &idx_organization); + static int to_vector_index_distance_metric( + const ObString &idx_distance_metric_str, + share::schema::ObVetcorIndexDistanceMetric &idx_distance_metric); + static int is_refresh_retry_ret_code(int ret_code); + static int get_vector_index_column_name( + const share::schema::ObTableSchema *base_table_schema, + const share::schema::ObTableSchema *index_id_schema, ObString &col_name); + int generate_vector_aux_index_name(VectorIndexAuxType index_type, + const uint64_t data_table_id, + const ObString &index_name, + ObString &real_index_name); + int mock_check_idx_col_name( + const ObString &idx_col_name, + const share::schema::ObTableSchema *&base_table_schema, + const share::schema::ObTableSchema *&delta_buf_table_schema, + const share::schema::ObTableSchema *&index_id_table_schema); + int check_idx_col_name( + const ObString &idx_col_name, + const share::schema::ObTableSchema *&base_table_schema, + const share::schema::ObTableSchema *&delta_buf_table_schema, + const share::schema::ObTableSchema *&index_id_table_schema); + // Only for mock testing. + int mock_resolve_and_check_table_valid( + const ObString &arg_idx_name, const ObString &arg_base_name, + const ObString &idx_col_name, + const share::schema::ObTableSchema *&base_table_schema, + const share::schema::ObTableSchema *&delta_buf_table_schema, + const share::schema::ObTableSchema *&index_id_table_schema); + int resolve_and_check_table_valid( + const ObString &arg_idx_name, const ObString &arg_base_name, + const ObString &idx_col_name, + const share::schema::ObTableSchema *&base_table_schema, + const share::schema::ObTableSchema *&delta_buf_table_schema, + const share::schema::ObTableSchema *&index_id_table_schema); + int resolve_refresh_arg(const ObVectorRefreshIndexArg &arg); + int resolve_rebuild_arg(const ObVectorRebuildIndexArg &arg); + int do_refresh(); + int do_rebuild(); + +private: + pl::ObPLExecCtx *pl_ctx_; + sql::ObExecContext *ctx_; + sql::ObSQLSessionInfo *session_info_; + sql::ObSchemaChecker schema_checker_; + + uint64_t tenant_id_; + uint64_t base_tb_id_; + uint64_t delta_buf_tb_id_; + uint64_t index_id_tb_id_; + share::schema::ObVectorRefreshMethod refresh_method_; + share::schema::ObVectorIndexOrganization idx_organization_; + share::schema::ObVetcorIndexDistanceMetric idx_distance_metrics_; + ObString idx_parameters_; + int64_t idx_parallel_creation_; + + double delta_rate_threshold_; // for rebuild index + int64_t refresh_threshold_; // for refresh index +}; + +} // namespace storage +} // namespace oceanbase \ No newline at end of file diff --git a/src/storage/vector_index/ob_vector_index_refresh.cpp b/src/storage/vector_index/ob_vector_index_refresh.cpp new file mode 100644 index 0000000000..156661f3fc --- /dev/null +++ b/src/storage/vector_index/ob_vector_index_refresh.cpp @@ -0,0 +1,589 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +// #define DBMS_VECTOR_MOCK_TEST +#define USING_LOG_PREFIX STORAGE + +#include "storage/vector_index/ob_vector_index_refresh.h" +#include "share/ob_errno.h" +#include "share/schema/ob_schema_getter_guard.h" +#include "sql/engine/ob_exec_context.h" +#include "storage/tablelock/ob_lock_inner_connection_util.h" +#include "sql/engine/cmd/ob_ddl_executor_util.h" + +namespace oceanbase { +namespace storage { +using namespace common; +using namespace observer; +using namespace share; +using namespace share::schema; +using namespace sql; + +ObVectorIndexRefresher::ObVectorIndexRefresher() + : ctx_(nullptr), refresh_ctx_(nullptr), is_inited_(false) {} + +ObVectorIndexRefresher::~ObVectorIndexRefresher() {} + +int ObVectorIndexRefresher::init(sql::ObExecContext &ctx, + ObVectorRefreshIndexCtx &refresh_ctx) { + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObVectorIndexRefresher init twice", KR(ret), KP(this)); + } else if (OB_UNLIKELY(nullptr == ctx.get_my_session() || + nullptr == ctx.get_sql_proxy() || + nullptr == refresh_ctx.trans_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), K(ctx), K(refresh_ctx)); + } else { + ctx_ = &ctx; + refresh_ctx_ = &refresh_ctx; + is_inited_ = true; + } + return ret; +} + +int ObVectorIndexRefresher::refresh() { + int ret = OB_SUCCESS; + const ObVectorRefreshMethod refresh_type = refresh_ctx_->refresh_method_; + if (ObVectorRefreshMethod::REBUILD_COMPLETE == refresh_type) { + if (OB_FAIL(do_rebuild())) { + LOG_WARN("fail to rebuild index", KR(ret)); + } + } else if (ObVectorRefreshMethod::REFRESH_DELTA == refresh_type) { + if (OB_FAIL(do_refresh())) { + LOG_WARN("fail to refresh index", KR(ret)); + } + } else { + ret = OB_NOT_SUPPORTED; + LOG_WARN("refresh type is not supported", KR(ret), K(refresh_type)); + } + return ret; +} + +int ObVectorIndexRefresher::get_current_scn(share::SCN ¤t_scn) { + int ret = OB_SUCCESS; + const int64_t DEFAULT_TIMEOUT = GCONF.internal_sql_execute_timeout; + transaction::ObTransService *txs = MTL(transaction::ObTransService *); + if (OB_ISNULL(txs)) { + ret = OB_ERR_SYS; + LOG_WARN("trans service is null", KR(ret)); + } else { + ObTimeoutCtx timeout_ctx; + if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(timeout_ctx, + DEFAULT_TIMEOUT))) { + LOG_WARN("fail to set default timeout ctx", KR(ret)); + } else if (OB_FAIL(txs->get_read_snapshot_version( + timeout_ctx.get_abs_timeout(), current_scn))) { + LOG_WARN("get read snapshot version", KR(ret)); + } + } + return ret; +} + +int ObVectorIndexRefresher::lock_delta_buf_tb( + ObVectorRefreshIdxTransaction &trans, const uint64_t tenant_id, + const uint64_t delta_buf_tb_id, const bool try_lock) { + int ret = OB_SUCCESS; + ObTableLockOwnerID owner_id; + if (OB_UNLIKELY(!trans.is_started() || OB_INVALID_TENANT_ID == tenant_id || + OB_INVALID_ID == delta_buf_tb_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), K(trans.is_started()), K(tenant_id), + K(delta_buf_tb_id)); + } else if (OB_FAIL(owner_id.convert_from_value( + ObLockOwnerType::DEFAULT_OWNER_TYPE, get_tid_cache()))) { + LOG_WARN("failed to get owner id", K(ret), K(get_tid_cache())); + } else { + const int64_t DEFAULT_TIMEOUT = GCONF.internal_sql_execute_timeout; + ObInnerSQLConnection *conn = nullptr; + ObLockObjRequest lock_arg; + lock_arg.obj_type_ = ObLockOBJType::OBJ_TYPE_REFRESH_VECTOR_INDEX; + lock_arg.obj_id_ = delta_buf_tb_id; + lock_arg.owner_id_ = owner_id; + lock_arg.lock_mode_ = EXCLUSIVE; + lock_arg.op_type_ = ObTableLockOpType::IN_TRANS_COMMON_LOCK; + if (OB_ISNULL(conn = static_cast( + trans.get_connection()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("conn_ is NULL", KR(ret)); + } else if (try_lock) { + lock_arg.timeout_us_ = 0; + } else { + ObTimeoutCtx timeout_ctx; + if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(timeout_ctx, + DEFAULT_TIMEOUT))) { + LOG_WARN("fail to set default timeout ctx", KR(ret)); + } else { + lock_arg.timeout_us_ = timeout_ctx.get_timeout(); + } + } + if (OB_SUCC(ret)) { + LOG_DEBUG("lock obj start", K(lock_arg)); + if (OB_FAIL( + ObInnerConnectionLockUtil::lock_obj(tenant_id, lock_arg, conn))) { + LOG_WARN("fail to lock obj", KR(ret)); + } + LOG_DEBUG("lock obj end", KR(ret)); + } + } + return ret; +} + +int ObVectorIndexRefresher::get_table_row_count(const ObString &db_name, + const ObString &table_name, + const share::SCN &scn, + int64_t &row_cnt) { + int ret = OB_SUCCESS; + const uint64_t exec_tenant_id = + ObSchemaUtils::get_exec_tenant_id(refresh_ctx_->tenant_id_); + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + common::sqlclient::ObMySQLResult *result = nullptr; + ObSqlString sql; + if (OB_FAIL(sql.assign_fmt( + "SELECT COUNT(*) AS CNT FROM `%.*s`.`%.*s` AS OF SNAPSHOT %ld", + static_cast(db_name.length()), db_name.ptr(), + static_cast(table_name.length()), table_name.ptr(), + scn.get_val_for_tx()))) { + LOG_WARN("fail to assign sql", KR(ret)); + } else if (OB_FAIL(refresh_ctx_->trans_->read(res, refresh_ctx_->tenant_id_, + sql.ptr()))) { + LOG_WARN("execute sql failed", KR(ret), K(sql)); + } else if (OB_ISNULL(result = res.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("result is null", KR(ret)); + } else if (OB_FAIL(result->next())) { + LOG_WARN("fail to get count", KR(ret)); + } else { + EXTRACT_INT_FIELD_MYSQL(*result, "CNT", row_cnt, int64_t); + LOG_DEBUG("############# DBMS_VECTOR ############### get delta_buf_table " + "row cnt ", + K(table_name), K(row_cnt)); + } + } + return ret; +} + +int ObVectorIndexRefresher::get_vector_index_col_names( + const ObTableSchema *table_schema, + bool is_collect_col_id, + ObIArray& col_ids, + ObSqlString &col_names) { + int ret = OB_SUCCESS; + ObArray col_name_array; + if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table schema is null", KR(ret)); + } else if (is_collect_col_id && col_ids.count() != 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("collect col id while col id array not empty", K(ret), K(col_ids), KPC(table_schema)); + } else if (!is_collect_col_id && col_ids.count() == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("not collect col id while col id array empty", K(ret), K(col_ids), KPC(table_schema)); + } else if (!is_collect_col_id) { // not collect id, index id table + // first get scn column + for (ObTableSchema::const_column_iterator iter = + table_schema->column_begin(); + OB_SUCC(ret) && iter != table_schema->column_end(); ++iter) { + const ObColumnSchemaV2 *column_schema = *iter; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Column schema is NULL", K(ret)); + } else if (column_schema->get_column_name_str().prefix_match(OB_VEC_SCN_COLUMN_NAME_PREFIX)) { + if (OB_FAIL(col_name_array.push_back(column_schema->get_column_name_str()))) { + LOG_WARN("fail to push back col name", K(ret), KPC(column_schema)); + } + } + } + // and than get col id cols + for (int64_t i = 0; i < col_ids.count() && OB_SUCC(ret); i++) { + const ObColumnSchemaV2 *column_schema = table_schema->get_column_schema(col_ids.at(i)); + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Column schema is NULL", K(ret)); + } else if (OB_FAIL(col_name_array.push_back(column_schema->get_column_name_str()))) { + LOG_WARN("fail to push back col name", K(ret), KPC(column_schema)); + } + } + } else { // collect col id, for delta buffer table, get type,vid,part key cols + for (ObTableSchema::const_column_iterator iter = + table_schema->column_begin(); + OB_SUCC(ret) && iter != table_schema->column_end(); ++iter) { + const ObColumnSchemaV2 *column_schema = *iter; + if (OB_ISNULL(column_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Column schema is NULL", K(ret)); + } else if (column_schema->get_column_name_str().prefix_match(OB_VEC_SCN_COLUMN_NAME_PREFIX)) { + // do nothing + } else if (column_schema->get_column_name_str().prefix_match(OB_VEC_VECTOR_COLUMN_NAME_PREFIX)) { + // do nothing + } else if (OB_FAIL(col_ids.push_back(column_schema->get_column_id()))) { + LOG_WARN("fail to push back col id", K(ret), KPC(column_schema)); + } else if (OB_FAIL(col_name_array.push_back(column_schema->get_column_name_str()))) { + LOG_WARN("fail to push back col name", K(ret), KPC(column_schema)); + } + } + if (OB_FAIL(ret)) { + } else if (col_name_array.count() < 2) { // at least type vid col + ret = OB_ERR_UNEXPECTED; + LOG_WARN("column array count is not expected", KR(ret), K(col_name_array)); + } + } + for (int64_t i = 0; i < col_name_array.count() && OB_SUCC(ret); i++) { + bool last_col = (i == (col_name_array.count() - 1)); + ObString &cur_col_name = col_name_array.at(i); + if (last_col && OB_FAIL(col_names.append_fmt("%.*s", static_cast(cur_col_name.length()), cur_col_name.ptr()))) { + LOG_WARN("fail to append str", KR(ret), K(cur_col_name)); + } else if (!last_col && OB_FAIL(col_names.append_fmt("%.*s, ", static_cast(cur_col_name.length()), cur_col_name.ptr()))) { + LOG_WARN("fail to append str", KR(ret), K(cur_col_name)); + } + } + return ret; +} + +int ObVectorIndexRefresher::lock_delta_buf_table_for_refresh() { + int ret = OB_SUCCESS; + const uint64_t tenant_id = refresh_ctx_->tenant_id_; + const uint64_t delta_buf_tb_id = refresh_ctx_->delta_buf_tb_id_; + int64_t retries = 0; + CK(OB_NOT_NULL(refresh_ctx_->trans_)); + while (OB_SUCC(ret) && OB_SUCC(ctx_->check_status())) { + if (OB_FAIL(ObVectorIndexRefresher::lock_delta_buf_tb( + *(refresh_ctx_->trans_), tenant_id, delta_buf_tb_id, true))) { + if (OB_UNLIKELY(OB_TRY_LOCK_ROW_CONFLICT != ret)) { + LOG_WARN("fail to lock delta_buf_table for refresh", KR(ret), + K(tenant_id), K(delta_buf_tb_id)); + } else { + ret = OB_SUCCESS; + ++retries; + if (retries % 10 == 0) { + LOG_WARN("retry too many times", K(retries), K(tenant_id), + K(delta_buf_tb_id)); + } + ob_usleep(100LL * 1000); + } + } else { + break; + } + } + return ret; +} + +int ObVectorIndexRefresher::do_refresh() { + int ret = OB_SUCCESS; + const uint64_t tenant_id = refresh_ctx_->tenant_id_; + ObVectorRefreshIdxTransaction &trans = *(refresh_ctx_->trans_); + ObSQLSessionInfo *session_info = nullptr; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *delta_table_schema = nullptr; + const ObTableSchema *index_id_tb_schema = nullptr; + const ObDatabaseSchema *db_schema = nullptr; + int64_t delta_table_row_cnt = 0; + ObSqlString index_id_tb_col_names; + ObSqlString delta_buf_tb_col_names; + ObTimeoutCtx timeout_ctx; + const int64_t DDL_INNER_SQL_EXECUTE_TIMEOUT = + ObDDLUtil::calc_inner_sql_execute_timeout(); + ObArray col_ids; + if (OB_FAIL(lock_delta_buf_table_for_refresh())) { + LOG_WARN("fail to lock delta_buf_table for refresh", KR(ret)); + } else if (OB_ISNULL(session_info = ctx_->get_my_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null session info", KR(ret), KPC(ctx_)); + } else if (OB_ISNULL(GCTX.schema_service_)) { + ret = OB_ERR_SYS; + LOG_WARN("schema service is null", KR(ret)); + } else if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard( + tenant_id, schema_guard))) { + LOG_WARN("fail to get tenant schema guard", KR(ret), K(tenant_id)); + } else if (OB_FAIL( + ObVectorIndexRefresher::get_current_scn(refresh_ctx_->scn_))) { + LOG_WARN("fail to get current scn", KR(ret)); + } + // get delta_buf_table row count + else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, + refresh_ctx_->delta_buf_tb_id_, + delta_table_schema))) { + LOG_WARN("fail to get delta buf table schema", KR(ret), K(tenant_id), + K(refresh_ctx_->delta_buf_tb_id_)); + } else if (OB_FAIL(schema_guard.get_table_schema( + tenant_id, refresh_ctx_->index_id_tb_id_, + index_id_tb_schema))) { + LOG_WARN("fail to get index id table schema", KR(ret), K(tenant_id), + K(refresh_ctx_->index_id_tb_id_)); + } else if (OB_ISNULL(delta_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("delta_buf_table not exist", KR(ret), K(tenant_id), + K(refresh_ctx_->delta_buf_tb_id_)); + } else if (OB_ISNULL(index_id_tb_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("index_id_table not exist", KR(ret), K(tenant_id), + K(refresh_ctx_->index_id_tb_id_)); + } else if (OB_FAIL(schema_guard.get_database_schema( + tenant_id, delta_table_schema->get_database_id(), + db_schema))) { + LOG_WARN("fail to get db schema", KR(ret), K(tenant_id), + K(delta_table_schema->get_database_id())); + } else if (OB_ISNULL(db_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("database not exist", KR(ret)); + } else if (OB_FAIL(get_table_row_count( + db_schema->get_database_name_str(), + delta_table_schema->get_table_name_str(), refresh_ctx_->scn_, + delta_table_row_cnt))) { + LOG_WARN("fail to get delta_buf_table row count", KR(ret), + K(delta_table_schema->get_table_name_str())); + } else if (delta_table_row_cnt < refresh_ctx_->refresh_threshold_) { + // refreshing is not triggered. + } +#ifndef DBMS_VECTOR_MOCK_TEST + else if (OB_FAIL(get_vector_index_col_names(delta_table_schema, + true, + col_ids, + delta_buf_tb_col_names))) { + LOG_WARN("fail to get vid & type col names", KR(ret), + K(delta_table_schema->get_table_name_str())); + } else if (OB_FAIL(get_vector_index_col_names(index_id_tb_schema, + false, + col_ids, + index_id_tb_col_names))) { + LOG_WARN("fail to get vid & type col names", KR(ret), + K(index_id_tb_schema->get_table_name_str())); + } +#endif + else if (OB_FAIL( + timeout_ctx.set_trx_timeout_us(DDL_INNER_SQL_EXECUTE_TIMEOUT))) { + LOG_WARN("set trx timeout failed", K(ret)); + } else if (OB_FAIL(timeout_ctx.set_timeout(DDL_INNER_SQL_EXECUTE_TIMEOUT))) { + LOG_WARN("set timeout failed", K(ret)); + } else { + // do refresh + if (OB_SUCC(ret)) { + int64_t affected_rows = 0; + // 1. insert into index_id_table select ... from delta_buf_table + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + common::sqlclient::ObMySQLResult *result = nullptr; + ObSqlString insert_sel_sql; +#ifdef DBMS_VECTOR_MOCK_TEST + if (OB_FAIL(insert_sel_sql.append_fmt( + "INSERT INTO `%.*s`.`%.*s` SELECT * FROM `%.*s`.`%.*s` WHERE " + "ora_rowscn <= %lu", + static_cast(db_schema->get_database_name_str().length()), + db_schema->get_database_name_str().ptr(), + static_cast( + index_id_tb_schema->get_table_name_str().length()), + index_id_tb_schema->get_table_name_str().ptr(), + static_cast(db_schema->get_database_name_str().length()), + db_schema->get_database_name_str().ptr(), + static_cast( + delta_table_schema->get_table_name_str().length()), + delta_table_schema->get_table_name_str().ptr(), + refresh_ctx_->scn_.get_val_for_sql()))) +#else + if (OB_FAIL(insert_sel_sql.append_fmt( + "INSERT INTO `%.*s`.`%.*s` (%.*s) SELECT ora_rowscn, %.*s FROM " + "`%.*s`.`%.*s` WHERE ora_rowscn <= %lu", + static_cast(db_schema->get_database_name_str().length()), + db_schema->get_database_name_str().ptr(), + static_cast( + index_id_tb_schema->get_table_name_str().length()), + index_id_tb_schema->get_table_name_str().ptr(), + static_cast(index_id_tb_col_names.length()), + index_id_tb_col_names.ptr(), + static_cast(delta_buf_tb_col_names.length()), + delta_buf_tb_col_names.ptr(), + static_cast(db_schema->get_database_name_str().length()), + db_schema->get_database_name_str().ptr(), + static_cast( + delta_table_schema->get_table_name_str().length()), + delta_table_schema->get_table_name_str().ptr(), + refresh_ctx_->scn_.get_val_for_sql()))) +#endif + { + LOG_WARN("fail to assign sql", KR(ret)); + } else if (OB_FAIL(refresh_ctx_->trans_->write( + tenant_id, insert_sel_sql.ptr(), affected_rows))) { + LOG_WARN("fail to execute insert into select sql", KR(ret), + K(tenant_id), K(insert_sel_sql)); + } + } + } + if (OB_SUCC(ret)) { + int64_t affected_rows = 0; + // 2. delete from delta_buf_table + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + common::sqlclient::ObMySQLResult *result = nullptr; + ObSqlString delete_sql; + if (OB_FAIL(delete_sql.append_fmt( + "DELETE FROM `%.*s`.`%.*s` WHERE ora_rowscn <= %lu", + static_cast(db_schema->get_database_name_str().length()), + db_schema->get_database_name_str().ptr(), + static_cast( + delta_table_schema->get_table_name_str().length()), + delta_table_schema->get_table_name_str().ptr(), + refresh_ctx_->scn_.get_val_for_sql()))) { + LOG_WARN("fail to assign sql", KR(ret)); + } else if (OB_FAIL(refresh_ctx_->trans_->write( + tenant_id, delete_sql.ptr(), affected_rows))) { + LOG_WARN("fail to execute insert into select sql", KR(ret), + K(tenant_id), K(delete_sql)); + } + } + } + } + return ret; +} + +int ObVectorIndexRefresher::do_rebuild() { + int ret = OB_SUCCESS; + const uint64_t tenant_id = refresh_ctx_->tenant_id_; + ObVectorRefreshIdxTransaction &trans = *(refresh_ctx_->trans_); + ObSQLSessionInfo *session_info = nullptr; + ObSchemaGetterGuard schema_guard; + const ObTableSchema *base_table_schema = nullptr; + const ObTableSchema *delta_table_schema = nullptr; + const ObTableSchema *index_id_tb_schema = nullptr; + const ObDatabaseSchema *db_schema = nullptr; + int64_t base_table_row_cnt = 0; + int64_t delta_table_row_cnt = 0; + int64_t index_id_table_row_cnt = 0; + bool triggered = true; + // refresh_ctx_->delta_rate_threshold_ = 0; // yjl, for test + if (OB_FAIL(lock_delta_buf_table_for_refresh())) { + LOG_WARN("fail to lock delta_buf_table for refresh", KR(ret)); + } else if (OB_ISNULL(session_info = ctx_->get_my_session())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null session info", KR(ret), KPC(ctx_)); + } else if (OB_ISNULL(GCTX.schema_service_)) { + ret = OB_ERR_SYS; + LOG_WARN("schema service is null", KR(ret)); + } else if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard( + tenant_id, schema_guard))) { + LOG_WARN("fail to get tenant schema guard", KR(ret), K(tenant_id)); + } else if (OB_FAIL( + ObVectorIndexRefresher::get_current_scn(refresh_ctx_->scn_))) { + LOG_WARN("fail to get current scn", KR(ret)); + } + // 1. get base_table row count + // 2. get delta_buf_table row count + // 3. get index_id_table row count + else if (OB_FAIL(schema_guard.get_table_schema( + tenant_id, refresh_ctx_->base_tb_id_, base_table_schema))) { + LOG_WARN("fail to get base table schema", KR(ret), K(tenant_id), + K(refresh_ctx_->base_tb_id_)); + } else if (OB_FAIL(schema_guard.get_table_schema( + tenant_id, refresh_ctx_->delta_buf_tb_id_, + delta_table_schema))) { + LOG_WARN("fail to get delta buf table schema", KR(ret), K(tenant_id), + K(refresh_ctx_->delta_buf_tb_id_)); + } else if (OB_FAIL(schema_guard.get_table_schema( + tenant_id, refresh_ctx_->index_id_tb_id_, + index_id_tb_schema))) { + LOG_WARN("fail to get index id table schema", KR(ret), K(tenant_id), + K(refresh_ctx_->index_id_tb_id_)); + } else if (OB_ISNULL(base_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("base_table not exist", KR(ret), K(tenant_id), + K(refresh_ctx_->base_tb_id_)); + } else if (OB_ISNULL(delta_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("delta_buf_table not exist", KR(ret), K(tenant_id), + K(refresh_ctx_->delta_buf_tb_id_)); + } else if (OB_ISNULL(index_id_tb_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("index_id_table not exist", KR(ret), K(tenant_id), + K(refresh_ctx_->index_id_tb_id_)); + } else if (OB_FAIL(schema_guard.get_database_schema( + tenant_id, delta_table_schema->get_database_id(), + db_schema))) { + LOG_WARN("fail to get db schema", KR(ret), K(tenant_id), + K(delta_table_schema->get_database_id())); + } else if (OB_ISNULL(db_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("database not exist", KR(ret)); + } else if (OB_UNLIKELY(0 == refresh_ctx_->delta_rate_threshold_)) { + // do nothing + } else if (OB_FAIL( + get_table_row_count(db_schema->get_database_name_str(), + base_table_schema->get_table_name_str(), + refresh_ctx_->scn_, base_table_row_cnt))) { + LOG_WARN("fail to get base table row count", KR(ret), + K(base_table_schema->get_table_name_str())); + } else if (OB_FAIL(get_table_row_count( + db_schema->get_database_name_str(), + delta_table_schema->get_table_name_str(), refresh_ctx_->scn_, + delta_table_row_cnt))) { + LOG_WARN("fail to get delta_buf_table row count", KR(ret), + K(delta_table_schema->get_table_name_str())); + } else if (OB_FAIL(get_table_row_count( + db_schema->get_database_name_str(), + index_id_tb_schema->get_table_name_str(), refresh_ctx_->scn_, + index_id_table_row_cnt))) { + LOG_WARN("fail to get index_id_table row count", KR(ret), + K(index_id_tb_schema->get_table_name_str())); + } else if (0 != base_table_row_cnt && + (index_id_table_row_cnt + delta_table_row_cnt) * 1.0 / + base_table_row_cnt < + refresh_ctx_->delta_rate_threshold_) { + // rebuilding is not triggered. + triggered = false; + LOG_WARN("no need to start rebuild", K(base_table_row_cnt)); + } + + if (OB_SUCC(ret) && triggered) { + LOG_INFO("start to rebuild vec index"); + const int64_t DEFAULT_TIMEOUT_US = GCONF.internal_sql_execute_timeout; + ObTimeoutCtx timeout_ctx; + ObAddr rs_addr; + obrpc::ObCommonRpcProxy *common_rpc_proxy = GCTX.rs_rpc_proxy_; + ObRebuildIndexArg rebuild_index_arg; + obrpc::ObAlterTableRes rebuild_index_res; + const bool is_support_cancel = true; + rebuild_index_arg.tenant_id_ = tenant_id; + rebuild_index_arg.exec_tenant_id_ = tenant_id; + rebuild_index_arg.session_id_ = session_info->get_sessid(); + rebuild_index_arg.database_name_ = db_schema->get_database_name_str(); + rebuild_index_arg.table_name_ = base_table_schema->get_table_name_str(); + rebuild_index_arg.index_name_ = delta_table_schema->get_table_name_str(); + rebuild_index_arg.index_table_id_ = delta_table_schema->get_table_id(); + rebuild_index_arg.index_action_type_ = obrpc::ObIndexArg::ADD_INDEX; + rebuild_index_arg.parallelism_ = refresh_ctx_->idx_parallel_creation_; + if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(timeout_ctx, DEFAULT_TIMEOUT_US))) { + LOG_WARN("fail to set default timeout ctx", KR(ret)); + } else if (OB_FAIL(GCTX.rs_mgr_->get_master_root_server(rs_addr))) { + LOG_WARN("fail to rootservice address", KR(ret)); + } else if (OB_ISNULL(common_rpc_proxy)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected common_rpc_proxy nullptr", K(ret)); + } else if (OB_FAIL(common_rpc_proxy->to(rs_addr).rebuild_vec_index(rebuild_index_arg, rebuild_index_res))) { + LOG_WARN("failed to post backup ls data res", K(ret), K(rebuild_index_arg)); + } else { + LOG_INFO("succ to send rebuild vector index rpc", K(rs_addr), K(refresh_ctx_)); + } + if (OB_SUCC(ret)) { + if (OB_FAIL(ObDDLExecutorUtil::wait_ddl_finish(rebuild_index_arg.tenant_id_, + rebuild_index_res.task_id_, + false/*do not retry at executor*/, + session_info, + common_rpc_proxy, + is_support_cancel))) { + LOG_WARN("fail wait rebuild vec index finish", K(ret)); + } else { + LOG_INFO("succ to wait rebuild vec index", K(ret), K(rebuild_index_res.task_id_), K(rebuild_index_arg)); + } + } + } + return ret; +} + +} // namespace storage +} // namespace oceanbase \ No newline at end of file diff --git a/src/storage/vector_index/ob_vector_index_refresh.h b/src/storage/vector_index/ob_vector_index_refresh.h new file mode 100644 index 0000000000..6f97a3e12d --- /dev/null +++ b/src/storage/vector_index/ob_vector_index_refresh.h @@ -0,0 +1,97 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#pragma once + +#include "storage/vector_index/ob_vector_refresh_idx_transaction.h" + +namespace oceanbase { +namespace sql { +class ObExecContext; +} + +namespace storage { + +struct ObVectorRefreshIndexCtx { +public: + ObVectorRefreshIndexCtx() + : allocator_("VecRefCtx"), tenant_id_(OB_INVALID_TENANT_ID), + base_tb_id_(OB_INVALID_ID), delta_buf_tb_id_(OB_INVALID_ID), + index_id_tb_id_(OB_INVALID_ID), trans_(nullptr), + refresh_method_(share::schema::ObVectorRefreshMethod::MAX) {} + bool is_valid() const { + return OB_INVALID_TENANT_ID != tenant_id_ && + OB_INVALID_ID != delta_buf_tb_id_ && OB_INVALID_ID != base_tb_id_ && + OB_INVALID_ID != index_id_tb_id_ && OB_NOT_NULL(trans_) && + share::schema::ObVectorRefreshMethod::MAX != refresh_method_; + } + void reuse() { + trans_ = nullptr; + allocator_.reuse(); + } + TO_STRING_KV(K_(tenant_id), K_(base_tb_id), K_(delta_buf_tb_id), + K_(index_id_tb_id), K_(refresh_method), K_(delta_rate_threshold), + K_(refresh_threshold)); + +public: + ObArenaAllocator allocator_; + uint64_t tenant_id_; + uint64_t base_tb_id_; + uint64_t delta_buf_tb_id_; + uint64_t index_id_tb_id_; + ObVectorRefreshIdxTransaction *trans_; + share::schema::ObVectorRefreshMethod refresh_method_; + share::schema::ObVectorIndexOrganization idx_organization_; + share::schema::ObVetcorIndexDistanceMetric idx_distance_metric_; + ObString idx_parameters_; + int64_t idx_parallel_creation_; + share::SCN scn_; + + double delta_rate_threshold_; + int64_t refresh_threshold_; +}; + +class ObVectorIndexRefresher { +public: + ObVectorIndexRefresher(); + ~ObVectorIndexRefresher(); + DISABLE_COPY_ASSIGN(ObVectorIndexRefresher); + + int init(sql::ObExecContext &ctx, ObVectorRefreshIndexCtx &refresh_ctx); + int refresh(); + + TO_STRING_KV(KP_(ctx), KP_(refresh_ctx)); + +private: + static int get_current_scn(share::SCN ¤t_scn); + static int lock_delta_buf_tb(ObVectorRefreshIdxTransaction &trans, + const uint64_t tenant_id, + const uint64_t delta_buf_tb_id, + const bool try_lock = false); + int get_table_row_count(const ObString &db_name, const ObString &table_name, + const share::SCN &scn, int64_t &row_cnt); + int get_vector_index_col_names(const ObTableSchema *table_schema, + bool is_collect_col_id, + ObIArray& col_ids, + ObSqlString &col_names); + int lock_delta_buf_table_for_refresh(); + int do_refresh(); + int do_rebuild(); + +private: + sql::ObExecContext *ctx_; + ObVectorRefreshIndexCtx *refresh_ctx_; + bool is_inited_; +}; + +} // namespace storage +} // namespace oceanbase \ No newline at end of file diff --git a/src/storage/vector_index/ob_vector_index_sched_job_utils.cpp b/src/storage/vector_index/ob_vector_index_sched_job_utils.cpp new file mode 100644 index 0000000000..75f945ae93 --- /dev/null +++ b/src/storage/vector_index/ob_vector_index_sched_job_utils.cpp @@ -0,0 +1,129 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "storage/vector_index/ob_vector_index_sched_job_utils.h" +#include "common/object/ob_object.h" +#include "lib/ob_errno.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/string/ob_sql_string.h" +#include "lib/string/ob_string.h" +#include "observer/dbms_scheduler/ob_dbms_sched_job_utils.h" + +namespace oceanbase { +using namespace common; +using namespace dbms_scheduler; +using namespace share; +using namespace share::schema; +using namespace sql; + +namespace storage { + +int ObVectorIndexSchedJobUtils::add_scheduler_job( + common::ObISQLClient &sql_client, const uint64_t tenant_id, + const int64_t job_id, const common::ObString &job_name, + const common::ObString &job_action, const common::ObObj &start_date, + const int64_t repeat_interval_ts, const common::ObString &exec_env) { + int ret = OB_SUCCESS; + if (OB_INVALID_TENANT_ID == tenant_id) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid tenant id", K(ret), K(tenant_id)); + } else { + int64_t start_date_us = start_date.is_null() ? ObTimeUtility::current_time() + : start_date.get_timestamp(); + int64_t end_date_us = 64060560000000000; // 4000-01-01 + HEAP_VAR(ObDBMSSchedJobInfo, job_info) { + job_info.tenant_id_ = tenant_id; + job_info.job_ = job_id; + job_info.job_name_ = job_name; + job_info.job_action_ = job_action; + job_info.lowner_ = ObString("oceanbase"); + job_info.cowner_ = ObString("oceanbase"); + job_info.powner_ = + lib::is_oracle_mode() ? ObString("ROOT") : ObString("root@%"); + job_info.job_style_ = ObString("regular"); + job_info.job_type_ = ObString("PLSQL_BLOCK"); + job_info.job_class_ = ObString("DATE_EXPRESSION_JOB_CLASS"); + job_info.what_ = job_action; + job_info.start_date_ = start_date_us; + job_info.end_date_ = end_date_us; + job_info.interval_ = job_info.repeat_interval_; + job_info.repeat_interval_ = ObString(); + job_info.enabled_ = 1; + job_info.auto_drop_ = 0; + job_info.max_run_duration_ = 24 * 60 * 60; // set to 1 day + job_info.interval_ts_ = repeat_interval_ts; + job_info.scheduler_flags_ = + ObDBMSSchedJobInfo::JOB_SCHEDULER_FLAG_DATE_EXPRESSION_JOB_CLASS; + job_info.exec_env_ = exec_env; + + if (OB_FAIL(ObDBMSSchedJobUtils::create_dbms_sched_job( + sql_client, tenant_id, job_id, job_info))) { + LOG_WARN("failed to create dbms scheduler job", KR(ret)); + } + } + } + return ret; +} + +int ObVectorIndexSchedJobUtils::add_vector_index_refresh_job( + common::ObISQLClient &sql_client, const uint64_t tenant_id, + const common::ObString &vec_id_index_tb_name, + const common::ObString &db_name, const common::ObString &table_name, + const common::ObString &index_name, const common::ObString &exec_env) { + int ret = OB_SUCCESS; + int64_t job_id = OB_INVALID_ID; + common::ObObj start_date; + start_date.set_null(); + if (OB_FAIL(ObMViewSchedJobUtils::generate_job_id(tenant_id, job_id))) { + LOG_WARN("failed to generate vector index refresh job id", K(ret)); + } else { + ObSqlString job_action; + if (OB_FAIL(job_action.assign_fmt( + "DBMS_VECTOR.refresh_index('%.*s.%.*s', '%.*s.%.*s', '', %lu, " + "'FAST')", + static_cast(db_name.length()), db_name.ptr(), + static_cast(index_name.length()), index_name.ptr(), + static_cast(db_name.length()), db_name.ptr(), + static_cast(table_name.length()), table_name.ptr(), + ObVectorIndexSchedJobUtils::DEFAULT_REFRESH_TRIGGER_THRESHOLD))) { + LOG_WARN("failed to generate refresh index job id", K(ret)); + } else if (OB_FAIL(ObVectorIndexSchedJobUtils::add_scheduler_job( + sql_client, tenant_id, job_id, vec_id_index_tb_name, + job_action.string(), start_date, + ObVectorIndexSchedJobUtils::DEFAULT_REFRESH_INTERVAL_TS, + exec_env))) { + LOG_WARN("failed to add refresh index job", K(ret), K(vec_id_index_tb_name), + K(job_action), K(exec_env)); + } else { + LOG_INFO("succeed to add refresh index job", K(ret), K(vec_id_index_tb_name), + K(job_action), K(exec_env)); + } + } + return ret; +} + +int ObVectorIndexSchedJobUtils::remove_vector_index_refresh_job( + common::ObISQLClient &sql_client, const uint64_t tenant_id, + const common::ObString &vec_id_index_tb_name) { + int ret = OB_SUCCESS; + if (OB_FAIL(ObDBMSSchedJobUtils::remove_dbms_sched_job( + sql_client, tenant_id, vec_id_index_tb_name, true))) { + LOG_WARN("failed to remove vector index refresh job", + KR(ret), K(tenant_id), K(vec_id_index_tb_name)); + } + return ret; +} + +} // namespace storage +} // namespace oceanbase \ No newline at end of file diff --git a/src/storage/vector_index/ob_vector_index_sched_job_utils.h b/src/storage/vector_index/ob_vector_index_sched_job_utils.h new file mode 100644 index 0000000000..4c1807602e --- /dev/null +++ b/src/storage/vector_index/ob_vector_index_sched_job_utils.h @@ -0,0 +1,70 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#pragma once + +#include "lib/ob_define.h" +#include "storage/mview/ob_mview_sched_job_utils.h" + +namespace oceanbase { +namespace share { +namespace schema { +class ObSchemaGetterGuard; +} +} // namespace share +namespace common { +class ObIAllocator; +class ObISQLClient; +class ObObj; +class ObString; +} // namespace common +namespace sql { +class ObResolverParams; +class ObSQLSessionInfo; +} // namespace sql +namespace dbms_scheduler { +class ObDBMSSchedJobInfo; +} +namespace storage { +class ObVectorIndexSchedJobUtils : public ObMViewSchedJobUtils { +public: + static constexpr char *VETCOR_INDEX_REFRESH_JOB_PREFIX = + const_cast("VECTOR_INDEX_REFRESH$J_"); + static constexpr int64_t DEFAULT_REFRESH_INTERVAL_TS = + 10 * 60 * 1000000; // 10min + static constexpr int64_t DEFAULT_REFRESH_TRIGGER_THRESHOLD = 10000; + ObVectorIndexSchedJobUtils() : ObMViewSchedJobUtils() {} + virtual ~ObVectorIndexSchedJobUtils() {} + + static int add_scheduler_job(common::ObISQLClient &sql_client, + const uint64_t tenant_id, const int64_t job_id, + const common::ObString &job_name, + const common::ObString &job_action, + const common::ObObj &start_date, + const int64_t repeat_interval_ts, + const common::ObString &exec_env); + + static int add_vector_index_refresh_job(common::ObISQLClient &sql_client, + const uint64_t tenant_id, + const common::ObString &vec_id_index_tb_name, + const common::ObString &db_name, + const common::ObString &table_name, + const common::ObString &index_name, + const common::ObString &exec_env); + + static int remove_vector_index_refresh_job(common::ObISQLClient &sql_client, + const uint64_t tenant_id, + const common::ObString &vec_id_index_tb_name); +}; + +} // namespace storage +} // namespace oceanbase \ No newline at end of file diff --git a/src/storage/vector_index/ob_vector_refresh_idx_transaction.cpp b/src/storage/vector_index/ob_vector_refresh_idx_transaction.cpp new file mode 100644 index 0000000000..46ddd6ad77 --- /dev/null +++ b/src/storage/vector_index/ob_vector_refresh_idx_transaction.cpp @@ -0,0 +1,241 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX STORAGE + +#include "storage/vector_index/ob_vector_refresh_idx_transaction.h" +#include "observer/ob_inner_sql_connection.h" +#include "observer/ob_inner_sql_connection_pool.h" + +namespace oceanbase +{ +namespace storage +{ +using namespace observer; +using namespace share; +using namespace sql; +using namespace common::sqlclient; + + +ObVectorRefreshIdxTransaction::ObSessionParamSaved::ObSessionParamSaved() + : session_info_(nullptr), is_inner_(false), autocommit_(false) +{ +} + +ObVectorRefreshIdxTransaction::ObSessionParamSaved::~ObSessionParamSaved() +{ + int ret = OB_SUCCESS; + if (nullptr != session_info_) { + if (OB_FAIL(restore())) { + LOG_WARN("fail to restore session param", KR(ret)); + } + } +} + +int ObVectorRefreshIdxTransaction::ObSessionParamSaved::save(ObSQLSessionInfo *session_info) +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(session_info_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("already save one session param", KR(ret), KP(session_info_), KP(session_info)); + } else if (OB_ISNULL(session_info)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), KP(session_info)); + } else { + bool autocommit = false; + if (OB_FAIL(session_info->get_autocommit(autocommit))) { + LOG_WARN("fail to get autocommit", KR(ret)); + } else { + session_info_ = session_info; + is_inner_ = session_info->is_inner(); + autocommit_ = autocommit; + session_info->set_inner_session(); + session_info->set_autocommit(false); + session_info->get_ddl_info().set_is_dummy_ddl_for_inner_visibility(true); + } + } + return ret; +} + +int ObVectorRefreshIdxTransaction::ObSessionParamSaved::restore() +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(session_info_)) { + if (is_inner_) { + session_info_->set_inner_session(); + } else { + session_info_->set_user_session(); + } + session_info_->set_autocommit(autocommit_); + session_info_->get_ddl_info().set_is_dummy_ddl_for_inner_visibility(false); + session_info_ = nullptr; + } + return ret; +} + +ObVectorRefreshIdxTransaction::ObVectorRefreshIdxTransaction() + : session_info_(nullptr), start_time_(OB_INVALID_TIMESTAMP), in_trans_(false) +{ +} + +ObVectorRefreshIdxTransaction::~ObVectorRefreshIdxTransaction() +{ + int ret = OB_SUCCESS; + if (in_trans_) { + if (OB_FAIL(end(OB_SUCCESS == get_errno()))) { + LOG_WARN("fail to end", KR(ret)); + } + } +} + +int ObVectorRefreshIdxTransaction::connect(ObSQLSessionInfo *session_info, ObISQLClient *sql_client) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr != pool_ || nullptr != conn_)) { + ret = OB_INNER_STAT_ERROR; + LOG_WARN("transaction can only be started once", KR(ret), K(pool_), K(conn_)); + } else if (OB_UNLIKELY(nullptr == session_info || nullptr == sql_client)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), KP(session_info), KP(sql_client)); + } else { + ObInnerSQLConnectionPool *pool = nullptr; + ObInnerSQLConnection *conn = nullptr; + if (OB_ISNULL(pool = static_cast(sql_client->get_pool()))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected connection pool", KR(ret)); + } else if (OB_FAIL(pool->acquire_spi_conn(session_info, conn))) { + LOG_WARN("acquire connection failed", KR(ret), K(pool), K(session_info)); + } else if (OB_ISNULL(conn)) { + ret = OB_INNER_STAT_ERROR; + LOG_WARN("connection can not be NULL", KR(ret), K_(pool)); + } else if (!sql_client->is_active()) { + ret = OB_INACTIVE_SQL_CLIENT; + LOG_WARN("inactive sql client", KR(ret)); + int tmp_ret = pool->release(conn, OB_SUCCESS == ret); + if (OB_SUCCESS != tmp_ret) { + LOG_WARN("release connection failed", K(tmp_ret)); + } + conn = nullptr; + } else { + sql_client_ = sql_client; + pool_ = pool; + conn_ = conn; + oracle_mode_ = session_info->is_oracle_compatible(); + } + } + return ret; +} + +int ObVectorRefreshIdxTransaction::start_transaction(uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + ObISQLConnection *conn = nullptr; + if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), K(tenant_id)); + } else if (OB_ISNULL(conn = get_connection())) { + ret = OB_INNER_STAT_ERROR; + LOG_WARN("conn_ is NULL", KR(ret)); + } else { + if (OB_FAIL(conn->start_transaction(tenant_id, false /*with_snapshot*/))) { + LOG_WARN("fail to start transaction", KR(ret), K(tenant_id)); + } + if (OB_SUCCESS == get_errno()) { + set_errno(ret); + } + } + return ret; +} + +int ObVectorRefreshIdxTransaction::end_transaction(const bool commit) +{ + int ret = OB_SUCCESS; + ObISQLConnection *conn = nullptr; + if (OB_ISNULL(conn = get_connection())) { + ret = OB_INNER_STAT_ERROR; + LOG_WARN("conn_ is NULL", KR(ret)); + } else { + if (commit) { + if (OB_FAIL(conn->commit())) { + LOG_WARN("fail to do commit", KR(ret)); + } + } else { + if (OB_FAIL(conn->rollback())) { + LOG_WARN("fail to do rollback", KR(ret)); + } + } + if (OB_SUCCESS == get_errno()) { + set_errno(ret); + } + } + return ret; +} + +int ObVectorRefreshIdxTransaction::start(ObSQLSessionInfo *session_info, ObISQLClient *sql_client) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(in_trans_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("already in trans", KR(ret)); + } else if (OB_UNLIKELY(nullptr == session_info || nullptr == sql_client)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", KR(ret), KP(session_info), KP(sql_client)); + } else if (OB_UNLIKELY(session_info->is_in_transaction())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected session is in trans", KR(ret)); + } else if (OB_FAIL(session_param_saved_.save(session_info))) { + LOG_WARN("fail to save session param", KR(ret)); + } else if (OB_FAIL(connect(session_info, sql_client))) { + LOG_WARN("fail to connect", KR(ret)); + } else { + const uint64_t tenant_id = session_info->get_effective_tenant_id(); + start_time_ = ObTimeUtility::current_time(); + if (OB_FAIL(start_transaction(tenant_id))) { + LOG_WARN("failed to start transaction", KR(ret), K(tenant_id)); + } else { + session_info_ = session_info; + in_trans_ = true; + LOG_DEBUG("start transaction success", K(tenant_id)); + } + } + if (OB_FAIL(ret)) { + int tmp_ret = OB_SUCCESS; + close(); + if (OB_TMP_FAIL(session_param_saved_.restore())) { + LOG_WARN("fail to restore session param", KR(tmp_ret)); + } + } + return ret; +} + +int ObVectorRefreshIdxTransaction::end(const bool commit) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + if (in_trans_) { + if (OB_FAIL(end_transaction(commit))) { + LOG_WARN("fail to end transation", KR(ret)); + } else { + LOG_DEBUG("end transaction success", K(commit)); + } + in_trans_ = false; + } + close(); + if (OB_TMP_FAIL(session_param_saved_.restore())) { + LOG_WARN("fail to restore session param", KR(tmp_ret)); + ret = COVER_SUCC(tmp_ret); + } + return ret; +} + +} +} \ No newline at end of file diff --git a/src/storage/vector_index/ob_vector_refresh_idx_transaction.h b/src/storage/vector_index/ob_vector_refresh_idx_transaction.h new file mode 100644 index 0000000000..52a219f453 --- /dev/null +++ b/src/storage/vector_index/ob_vector_refresh_idx_transaction.h @@ -0,0 +1,67 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#pragma once + +#include "lib/mysqlclient/ob_single_connection_proxy.h" +#include "sql/session/ob_sql_session_info.h" + +namespace oceanbase { +namespace storage { + +class ObVectorRefreshIdxTransaction : public common::ObSingleConnectionProxy { + friend class ObVectorRefreshIdxTxnInnerMySQLGuard; + +public: + ObVectorRefreshIdxTransaction(); + virtual ~ObVectorRefreshIdxTransaction(); + DISABLE_COPY_ASSIGN(ObVectorRefreshIdxTransaction); + + int start(sql::ObSQLSessionInfo *session_info, ObISQLClient *sql_client); + int end(const bool commit); + bool is_started() const { return in_trans_; } + sql::ObSQLSessionInfo *get_session_info() const { return session_info_; } + ObCompatibilityMode get_compatibility_mode() const { + return nullptr != session_info_ ? session_info_->get_compatibility_mode() + : ObCompatibilityMode::OCEANBASE_MODE; + } + +protected: + int connect(sql::ObSQLSessionInfo *session_info, ObISQLClient *sql_client); + int start_transaction(uint64_t tenant_id); + int end_transaction(const bool commit); + +protected: + class ObSessionParamSaved { + public: + ObSessionParamSaved(); + ~ObSessionParamSaved(); + DISABLE_COPY_ASSIGN(ObSessionParamSaved); + + int save(sql::ObSQLSessionInfo *session_info); + int restore(); + + private: + sql::ObSQLSessionInfo *session_info_; + bool is_inner_; + bool autocommit_; + }; + +private: + sql::ObSQLSessionInfo *session_info_; + ObSessionParamSaved session_param_saved_; + int64_t start_time_; + bool in_trans_; +}; + +} // namespace storage +} // namespace oceanbase \ No newline at end of file diff --git a/tools/deploy/mysql_test/r/mysql/information_schema.result b/tools/deploy/mysql_test/r/mysql/information_schema.result index 6ef41f4cf0..16e9c72bf4 100644 --- a/tools/deploy/mysql_test/r/mysql/information_schema.result +++ b/tools/deploy/mysql_test/r/mysql/information_schema.result @@ -917,6 +917,7 @@ select * from information_schema.tables where table_schema in ('oceanbase', 'mys | def | oceanbase | __all_virtual_transaction_freeze_checkpoint | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_unit | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_user | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | +| def | oceanbase | __all_virtual_vector_index_info | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_virtual_long_ops_status_mysql_sys_agent | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_wr_active_session_history | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_wr_control | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | @@ -2450,6 +2451,7 @@ select * from information_schema.tables where table_schema in ('oceanbase', 'mys | def | oceanbase | __all_virtual_trans_stat | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_unit | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_user | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | +| def | oceanbase | __all_virtual_vector_index_info | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_virtual_long_ops_status_mysql_sys_agent | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_wr_active_session_history | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | | def | oceanbase | __all_virtual_wr_control | SYSTEM TABLE | MEMORY | NULL | DYNAMIC | NULL | NULL | NULL | NULL | 0 | NULL | NULL | NULL | NULL | NULL | utf8mb4_general_ci | NULL | NULL | | diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_data_type.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_data_type.result index 1e75119bb7..aa8fb8bed0 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_data_type.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_data_type.result @@ -52,7 +52,7 @@ data_type data_type_str data_type_class 48 GEOMETRY 23 49 UDT 24 50 DECIMAL_INT 25 -51 COLLECTION 26 +51 ARRAY 26 52 MYSQL_DATE 27 53 MYSQL_DATETIME 28 54 ROARINGBITMAP 29 @@ -109,7 +109,7 @@ data_type data_type_str data_type_class 48 GEOMETRY 23 49 UDT 24 50 DECIMAL_INT 25 -51 COLLECTION 26 +51 ARRAY 26 52 MYSQL_DATE 27 53 MYSQL_DATETIME 28 54 ROARINGBITMAP 29 @@ -167,7 +167,7 @@ JSON JSON GEOMETRY GEOMETRY UDT UDT DECIMAL_INT DECIMAL_INT -COLLECTION COLLECTION +ARRAY COLLECTION MYSQL_DATE MYSQL_DATE MYSQL_DATETIME MYSQL_DATETIME ROARINGBITMAP ROARINGBITMAP diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_data_type_class.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_data_type_class.result index ea46369555..eee8373a06 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_data_type_class.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_data_type_class.result @@ -115,7 +115,7 @@ JSON JSON GEOMETRY GEOMETRY UDT UDT DECIMAL_INT DECIMAL_INT -COLLECTION COLLECTION +ARRAY COLLECTION MYSQL_DATE MYSQL_DATE MYSQL_DATETIME MYSQL_DATETIME ROARINGBITMAP ROARINGBITMAP diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result index 9e27a44b30..00fd2c964b 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result @@ -188,6 +188,7 @@ ob_query_switch_leader_retry_timeout ob_ratelimit_stat_period ob_ssl_invited_common_names ob_startup_mode +ob_vector_memory_limit_percentage open_cursors optimizer_index_cost_adj opt_tab_stat_cache_priority diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_mysql.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_mysql.result index 714f6df88a..744c718e9f 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_mysql.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_mysql.result @@ -5039,6 +5039,32 @@ IF(count(*) >= 0, 1, 0) "oceanbase.__all_virtual_nic_info runs in single server" IF(count(*) >= 0, 1, 0) 1 +desc oceanbase.__all_virtual_vector_index_info; +Field Type Null Key Default Extra +svr_ip varchar(46) NO NULL +svr_port bigint(20) NO NULL +tenant_id bigint(20) NO NULL +ls_id bigint(20) NO NULL +rowkey_vid_table_id bigint(20) NO NULL +vid_rowkey_table_id bigint(20) NO NULL +inc_index_table_id bigint(20) NO NULL +vbitmap_table_id bigint(20) NO NULL +snapshot_index_table_id bigint(20) NO NULL +data_table_id bigint(20) NO NULL +rowkey_vid_tablet_id bigint(20) NO NULL +vid_rowkey_tablet_id bigint(20) NO NULL +inc_index_tablet_id bigint(20) NO NULL +vbitmap_tablet_id bigint(20) NO NULL +snapshot_index_tablet_id bigint(20) NO NULL +data_tablet_id bigint(20) NO NULL +statistics varchar(2048) NO NULL +sync_info varchar(1024) NO NULL +select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_vector_index_info; +IF(count(*) >= 0, 1, 0) +1 +"oceanbase.__all_virtual_vector_index_info runs in single server" +IF(count(*) >= 0, 1, 0) +1 desc oceanbase.__all_virtual_temp_file; Field Type Null Key Default Extra tenant_id bigint(20) NO NULL diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result index 04697e0def..6b9de932db 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result @@ -9751,6 +9751,32 @@ description varchar(2048) YES NULL select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_spatial_reference_systems; IF(count(*) >= 0, 1, 0) 1 +desc oceanbase.__all_virtual_vector_index_info; +Field Type Null Key Default Extra +svr_ip varchar(46) NO NULL +svr_port bigint(20) NO NULL +tenant_id bigint(20) NO NULL +ls_id bigint(20) NO NULL +rowkey_vid_table_id bigint(20) NO NULL +vid_rowkey_table_id bigint(20) NO NULL +inc_index_table_id bigint(20) NO NULL +vbitmap_table_id bigint(20) NO NULL +snapshot_index_table_id bigint(20) NO NULL +data_table_id bigint(20) NO NULL +rowkey_vid_tablet_id bigint(20) NO NULL +vid_rowkey_tablet_id bigint(20) NO NULL +inc_index_tablet_id bigint(20) NO NULL +vbitmap_tablet_id bigint(20) NO NULL +snapshot_index_tablet_id bigint(20) NO NULL +data_tablet_id bigint(20) NO NULL +statistics varchar(2048) NO NULL +sync_info varchar(1024) NO NULL +select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_vector_index_info; +IF(count(*) >= 0, 1, 0) +1 +"oceanbase.__all_virtual_vector_index_info runs in single server" +IF(count(*) >= 0, 1, 0) +1 desc oceanbase.__all_virtual_temp_file; Field Type Null Key Default Extra tenant_id bigint(20) NO NULL diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result index a2b3714ecb..3d944e1583 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result @@ -762,6 +762,7 @@ select 0xffffffffff & table_id, table_name, table_type, database_id, part_num fr 12487 __all_virtual_nic_info 2 201001 1 12488 __all_virtual_scheduler_job_run_detail_v2 2 201001 1 12490 __all_virtual_spatial_reference_systems 2 201001 1 +12496 __all_virtual_vector_index_info 2 201001 1 12505 __all_virtual_temp_file 2 201001 1 20001 GV$OB_PLAN_CACHE_STAT 1 201001 1 20002 GV$OB_PLAN_CACHE_PLAN_STAT 1 201001 1 diff --git a/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.cpp b/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.cpp index fe5ac980ef..6ff5f2252c 100644 --- a/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.cpp +++ b/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.cpp @@ -30,7 +30,7 @@ #endif #include "logservice/data_dictionary/ob_data_dict_iterator.h" // ObDataDictIterator #include "share/scn.h" - +#include "share/vector_index/ob_plugin_vector_index_scheduler.h" #include #include @@ -627,6 +627,20 @@ int ObAdminParserLogEntry::parse_dup_table_log_() return ret; } +int ObAdminParserLogEntry::parse_vector_index_log_() +{ + int ret = OB_SUCCESS; + ObVectorIndexTabletIDArray tmp_tablet_id_array_; + ObVectorIndexTableIDArray tmp_table_id_array_; + ObVectorIndexSyncLog vector_index_log(tmp_tablet_id_array_, tmp_table_id_array_); + if (OB_FAIL(vector_index_log.deserialize(buf_, buf_len_, pos_))) { + TRANS_LOG(WARN, "desrialize vector_index_log error", K(ret), KP(buf_), K(buf_len_), K(pos_)); + } else { + fprintf(stdout, " ###: %s\n", to_cstring(vector_index_log)); + } + return ret; +} + int ObAdminParserLogEntry::parse_different_entry_type_(const logservice::ObLogBaseHeader &header) { int ret = OB_SUCCESS; @@ -709,6 +723,10 @@ int ObAdminParserLogEntry::parse_different_entry_type_(const logservice::ObLogBa ret = parse_dup_table_log_(); break; } + case oceanbase::logservice::ObLogBaseType::VEC_INDEX_LOG_BASE_TYPE: { + ret = parse_vector_index_log_(); + break; + } default: { fprintf(stdout, " Unknown Base Log Type : %d\n", header.get_log_type()); diff --git a/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.h b/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.h index 1d936887bf..ca1515da5c 100644 --- a/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.h +++ b/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.h @@ -65,6 +65,7 @@ private: int parse_reserved_snapshot_log_(); int parse_medium_log_(); int parse_dup_table_log_(); + int parse_vector_index_log_(); //log type belong to trans_service int parse_trans_redo_log_(transaction::ObTxLogBlock &tx_log_block, diff --git a/unittest/share/CMakeLists.txt b/unittest/share/CMakeLists.txt index 92f7f730f0..3f76934f7d 100644 --- a/unittest/share/CMakeLists.txt +++ b/unittest/share/CMakeLists.txt @@ -69,7 +69,9 @@ ob_unittest(test_geo_func_difference) ob_unittest(test_geo_func_union) ob_unittest(test_geo_func_box) ob_unittest(test_throttling_utils) +ob_unittest(test_array_meta) ob_unittest(test_roaringbitmap) +ob_unittest(test_vector_index_serialize) ob_unittest(test_json_base) ob_unittest(test_json_bin) diff --git a/unittest/share/test_array_meta.cpp b/unittest/share/test_array_meta.cpp new file mode 100644 index 0000000000..791afec16b --- /dev/null +++ b/unittest/share/test_array_meta.cpp @@ -0,0 +1,486 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#define private public +#define protected public +#include "lib/udt/ob_collection_type.h" +#include "lib/udt/ob_array_type.h" +#include "lib/json_type/ob_json_tree.h" +#include "lib/json_type/ob_json_bin.h" +#include "lib/json_type/ob_json_parse.h" +#include "sql/engine/expr/ob_array_cast.h" +#undef private +#undef protected + +#include +#include + +namespace oceanbase { +namespace common { +class TestArrayMeta : public ::testing::Test +{ +public: + TestArrayMeta() + {} + ~TestArrayMeta() + {} + +private: + ObArenaAllocator allocator_; + // disallow copy + DISALLOW_COPY_AND_ASSIGN(TestArrayMeta); +}; + +TEST_F(TestArrayMeta, serialize_deserialize) +{ + ObCollectionBasicType int_type; + int_type.basic_meta_.meta_.set_int32(); + int_type.type_id_ = ObNestedType::OB_BASIC_TYPE; + ObArenaAllocator allocator(ObModIds::TEST); + ObCollectionArrayType arr1_type(allocator); + arr1_type.element_type_ = &int_type; + arr1_type.type_id_ = ObNestedType::OB_ARRAY_TYPE; + ObCollectionArrayType arr2_type(allocator); + arr2_type.element_type_ = &arr1_type; + arr2_type.type_id_ = ObNestedType::OB_ARRAY_TYPE; + ObSqlCollectionInfo type_info(allocator); + type_info.collection_meta_ = &arr2_type; + ObString type_name(strlen("ARRAY(ARRAY(INT))"), "ARRAY(ARRAY(INT))"); + type_info.set_name(type_name); + char buf[1024] = {0}; + int64_t pos = 0; + ASSERT_EQ(OB_SUCCESS, arr1_type.serialize(buf, 1024, pos)); + ObCollectionArrayType arr1_type_res(allocator); + int64_t data_len = pos; + pos = 0; + ASSERT_EQ(OB_SUCCESS, arr1_type_res.deserialize(buf, data_len, pos)); + ASSERT_EQ(arr1_type_res.type_id_, ObNestedType::OB_ARRAY_TYPE); + ObCollectionBasicType *basic_type = reinterpret_cast(arr1_type_res.element_type_); + ASSERT_EQ(basic_type->basic_meta_, int_type.basic_meta_); + + + ObSqlCollectionInfo type_info_parse(allocator); + type_info_parse.set_name(type_name); + ASSERT_EQ(OB_SUCCESS, type_info_parse.parse_type_info()); + ObCollectionArrayType *arr_meta = static_cast(type_info_parse.collection_meta_); + ASSERT_EQ(arr_meta->type_id_, ObNestedType::OB_ARRAY_TYPE); + arr_meta = static_cast(arr_meta->element_type_); + ASSERT_EQ(arr_meta->type_id_, ObNestedType::OB_ARRAY_TYPE); + basic_type = static_cast(arr_meta->element_type_); + ASSERT_EQ(basic_type->basic_meta_, int_type.basic_meta_); + + + ObSqlCollectionInfo type1_info_parse(allocator); + ObCollectionBasicType varchar_type; + varchar_type.basic_meta_.meta_.set_varchar(); + varchar_type.basic_meta_.set_length(256); + // set default cs + varchar_type.basic_meta_.meta_.set_collation_type(CS_TYPE_UTF8MB4_BIN); + varchar_type.basic_meta_.meta_.set_collation_level(CS_LEVEL_COERCIBLE); + ObString type1_name(strlen("ARRAY(ARRAY(VARCHAR(256)))"), "ARRAY(ARRAY(VARCHAR(256)))"); + type1_info_parse.set_name(type1_name); + ASSERT_EQ(OB_SUCCESS, type1_info_parse.parse_type_info()); + arr_meta = static_cast(type1_info_parse.collection_meta_); + ASSERT_EQ(arr_meta->type_id_, ObNestedType::OB_ARRAY_TYPE); + arr_meta = static_cast(arr_meta->element_type_); + ASSERT_EQ(arr_meta->type_id_, ObNestedType::OB_ARRAY_TYPE); + basic_type = static_cast(arr_meta->element_type_); + ASSERT_EQ(basic_type->basic_meta_, varchar_type.basic_meta_); +} + +TEST_F(TestArrayMeta, varchar_arra_construct) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObSqlCollectionInfo type1_info_parse(allocator); + ObString type1_name(strlen("ARRAY(VARCHAR(256))"), "ARRAY(VARCHAR(256))"); + type1_info_parse.set_name(type1_name); + ASSERT_EQ(OB_SUCCESS, type1_info_parse.parse_type_info()); + ObIArrayType *arr_var1 = nullptr; + ObCollectionArrayType *arr_type1 = static_cast(type1_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var1)); + // construct array from string ["hello", "world"] + ObString arr1_text("[\"hello\", \"world\"]"); + ObCollectionBasicType *dst_elem_type = static_cast(arr_type1->element_type_); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr1_text, arr_var1, dst_elem_type)); + ObStringBuffer format_str(&allocator); + ASSERT_EQ(OB_SUCCESS, arr_var1->init()); + ASSERT_EQ(OB_SUCCESS, arr_var1->print(arr_type1, format_str)); + std::cout << "arr_va1: " << format_str.ptr() << std::endl; + + // construct array(arrray(varchar)) + ObSqlCollectionInfo type2_info_parse(allocator); + ObString type2_name(strlen("ARRAY(ARRAY(VARCHAR(256)))"), "ARRAY(ARRAY(VARCHAR(256)))"); + type2_info_parse.set_name(type2_name); + ASSERT_EQ(OB_SUCCESS, type2_info_parse.parse_type_info()); + ObIArrayType *arr_var2 = nullptr; + ObCollectionArrayType *arr_type2 = static_cast(type2_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type2, arr_var2)); + // push ["hello", "world"] to array(arrray(varchar)) + ASSERT_EQ(OB_SUCCESS, static_cast(arr_var2)->push_back(*arr_var1)); + + // construct array from string ["hello", "world", "hi", "what", "are you?"] + ObString arr2_text("[\"hi\", null, \"what\", \"are you?\"]"); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr2_text, arr_var1, dst_elem_type)); + format_str.reset(); + ASSERT_EQ(OB_SUCCESS, arr_var1->init()); + ASSERT_EQ(OB_SUCCESS, arr_var1->print(dst_elem_type, format_str)); + std::cout << "arr_va1: " << format_str.ptr() << std::endl; + // push ["hello", "world", "hi", "what", "are you?"] to array(arrray(varchar)) + ASSERT_EQ(OB_SUCCESS, static_cast(arr_var2)->push_back(*arr_var1)); + ASSERT_EQ(OB_SUCCESS, arr_var2->init()); + format_str.reset(); + ASSERT_EQ(OB_SUCCESS, arr_var2->print(arr_type2->element_type_, format_str)); + std::cout << "arr_va2: " << format_str.ptr() << std::endl; + + char raw_binary[1024] = {0}; + ASSERT_EQ(OB_SUCCESS, arr_var2->get_raw_binary(raw_binary, 1024)); + int32_t raw_len = arr_var2->get_raw_binary_len(); + ObIArrayType *arr_var3 = nullptr; + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type2, arr_var3)); + ObString raw_str(raw_len, raw_binary); + ASSERT_EQ(OB_SUCCESS, arr_var3->init(raw_str)); + format_str.reset(); + ASSERT_EQ(OB_SUCCESS, arr_var3->print(arr_type2->element_type_, format_str)); + std::cout << "arr_va3: " << format_str.ptr() << std::endl; + + // construct array(array(array(varchar))) + ObSqlCollectionInfo type3_info_parse(allocator); + ObString type3_name(strlen("ARRAY(ARRAY(ARRAY(VARCHAR(256))))"), "ARRAY(ARRAY(ARRAY(VARCHAR(256))))"); + type3_info_parse.set_name(type3_name); + ASSERT_EQ(OB_SUCCESS, type3_info_parse.parse_type_info()); + ObIArrayType *arr_var4 = nullptr; + ObCollectionArrayType *arr_type3 = static_cast(type3_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type3, arr_var4)); + // push arr_var3 to array(arrray(varchar)) + ASSERT_EQ(OB_SUCCESS, static_cast(arr_var4)->push_back(*arr_var3)); + ASSERT_EQ(OB_SUCCESS, static_cast(arr_var4)->push_back(*arr_var3)); + ASSERT_EQ(OB_SUCCESS, arr_var4->init()); + format_str.reset(); + ASSERT_EQ(OB_SUCCESS, arr_var4->print(arr_type3->element_type_, format_str)); + std::cout << "arr_va4: " << format_str.ptr() << std::endl; + + // arr_var4->at(i) + ObIArrayType *arr_var5 = nullptr; + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type2, arr_var5)); + for (uint32_t i = 0; i < arr_var4->size(); ++i) { + ASSERT_EQ(OB_SUCCESS, arr_var4->at(i, *arr_var5)); + ASSERT_EQ(OB_SUCCESS, arr_var5->init()); + format_str.reset(); + ASSERT_EQ(OB_SUCCESS, arr_var5->print(arr_type2->element_type_, format_str)); + std::cout << "arr_va5: " << i << ": "<< format_str.ptr() << std::endl; + arr_var5->clear(); + } +} + +TEST_F(TestArrayMeta, fixsize_array_construct) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObSqlCollectionInfo type1_info_parse(allocator); + ObString type1_name(strlen("ARRAY(FLOAT)"), "ARRAY(FLOAT)"); + type1_info_parse.set_name(type1_name); + ASSERT_EQ(OB_SUCCESS, type1_info_parse.parse_type_info()); + ObIArrayType *arr_var1 = nullptr; + ObCollectionArrayType *arr_type1 = static_cast(type1_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var1)); + // construct array from string [3.14, 1.414, 2.718] + ObString arr1_text("[3.14, 1.414, 2.718]"); + ObCollectionBasicType *dst_elem_type = static_cast(arr_type1->element_type_); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr1_text, arr_var1, dst_elem_type)); + ObStringBuffer format_str(&allocator); + ASSERT_EQ(OB_SUCCESS, arr_var1->init()); + ASSERT_EQ(OB_SUCCESS, arr_var1->print(dst_elem_type, format_str)); + std::cout << "arr_va1: " << format_str.ptr() << std::endl; + + // construct array(arrray(varchar)) + ObSqlCollectionInfo type2_info_parse(allocator); + ObString type2_name(strlen("ARRAY(ARRAY(FLOAT))"), "ARRAY(ARRAY(FLOAT))"); + type2_info_parse.set_name(type2_name); + ASSERT_EQ(OB_SUCCESS, type2_info_parse.parse_type_info()); + ObIArrayType *arr_var2 = nullptr; + ObCollectionArrayType *arr_type2 = static_cast(type2_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type2, arr_var2)); + // push [3.14, 1.414, 2.718] to array(arrray(float)) + ASSERT_EQ(OB_SUCCESS, static_cast(arr_var2)->push_back(*arr_var1)); + + // construct array from string [5, 6.88, null, 8.01] + ObString arr2_text("[5, 6.88, null, 8.01]"); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var1)); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr2_text, arr_var1, dst_elem_type)); + format_str.reset(); + ASSERT_EQ(OB_SUCCESS, arr_var1->init()); + ASSERT_EQ(OB_SUCCESS, arr_var1->print(dst_elem_type, format_str)); + std::cout << "arr_va1: " << format_str.ptr() << std::endl; + // push [5, 6.88, null, 8.01] to array(arrray(float)) + ASSERT_EQ(OB_SUCCESS, static_cast(arr_var2)->push_back(*arr_var1)); + ASSERT_EQ(OB_SUCCESS, arr_var2->init()); + format_str.reset(); + ASSERT_EQ(OB_SUCCESS, arr_var2->print(arr_type2->element_type_, format_str)); + std::cout << "arr_va2: " << format_str.ptr() << std::endl; + + char raw_binary[1024] = {0}; + ASSERT_EQ(OB_SUCCESS, arr_var2->get_raw_binary(raw_binary, 1024)); + int32_t raw_len = arr_var2->get_raw_binary_len(); + ObIArrayType *arr_var3 = nullptr; + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type2, arr_var3)); + ObString raw_str(raw_len, raw_binary); + ASSERT_EQ(OB_SUCCESS, arr_var3->init(raw_str)); + format_str.reset(); + ASSERT_EQ(OB_SUCCESS, arr_var3->print(arr_type2->element_type_, format_str)); + std::cout << "arr_va3: " << format_str.ptr() << std::endl; + +} + +TEST_F(TestArrayMeta, type_deduce) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObSqlCollectionInfo type1_info_parse(allocator); + ObString type1_name(strlen("ARRAY(VARCHAR(256))"), "ARRAY(VARCHAR(256))"); + type1_info_parse.set_name(type1_name); + ASSERT_EQ(OB_SUCCESS, type1_info_parse.parse_type_info()); + + ObSqlCollectionInfo type2_info_parse(allocator); + ObString type2_name(strlen("ARRAY(VARCHAR(16))"), "ARRAY(VARCHAR(16))"); + type2_info_parse.set_name(type1_name); + ASSERT_EQ(OB_SUCCESS, type2_info_parse.parse_type_info()); + // different str_len is still has same super type + ASSERT_EQ(true, type2_info_parse.has_same_super_type(type1_info_parse)); + + ObSqlCollectionInfo decimal_array(allocator); + ObString type3_name(strlen("ARRAY(DECIMAL_INT(10,2))"), "ARRAY(DECIMAL_INT(10,2))"); + decimal_array.set_name(type3_name); + ASSERT_EQ(OB_SUCCESS, decimal_array.parse_type_info()); + + ObSqlCollectionInfo float_array(allocator); + ObString type4_name(strlen("ARRAY(FLOAT)"), "ARRAY(FLOAT)"); + float_array.set_name(type4_name); + ASSERT_EQ(OB_SUCCESS, float_array.parse_type_info()); + + // array(float)/array(decimal) has same super type + ASSERT_EQ(true, decimal_array.has_same_super_type(float_array)); +} + +TEST_F(TestArrayMeta, nested_array_parse) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObSqlCollectionInfo type1_info_parse(allocator); + ObString type1_name(strlen("ARRAY(ARRAY(DOUBLE))"), "ARRAY(ARRAY(DOUBLE))"); + type1_info_parse.set_name(type1_name); + ASSERT_EQ(OB_SUCCESS, type1_info_parse.parse_type_info()); + ObIArrayType *arr_var1 = nullptr; + ObCollectionArrayType *arr_type1 = static_cast(type1_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var1)); + // construct array from string ["hello", "world"] + ObString arr1_text("[[3.14159, 95.27, null], [null, 8.878, 912.33], [333, 12.134, null]]"); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr1_text, arr_var1, arr_type1->element_type_)); + ObStringBuffer format_str(&allocator); + ASSERT_EQ(OB_SUCCESS, arr_var1->init()); + ASSERT_EQ(OB_SUCCESS, arr_var1->print(arr_type1->element_type_, format_str)); + std::cout << "arr_va1: " << format_str.ptr() << std::endl; +} + +TEST_F(TestArrayMeta, array_compare) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObSqlCollectionInfo type1_info_parse(allocator); + ObString type1_name(strlen("ARRAY(FLOAT)"), "ARRAY(FLOAT)"); + type1_info_parse.set_name(type1_name); + ASSERT_EQ(OB_SUCCESS, type1_info_parse.parse_type_info()); + ObIArrayType *arr_var1 = nullptr; + ObIArrayType *arr_var2 = nullptr; + ObCollectionArrayType *arr_type1 = static_cast(type1_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var1)); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var2)); + // construct array from string [3.14, 1.414, 2.718] + ObString arr1_text("[3.14, 1.414, 2.718]"); + ObCollectionBasicType *dst_elem_type = static_cast(arr_type1->element_type_); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr1_text, arr_var1, dst_elem_type)); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr1_text, arr_var2, dst_elem_type)); + ASSERT_EQ(OB_SUCCESS, arr_var1->init()); + ASSERT_EQ(OB_SUCCESS, arr_var2->init()); + int cmp_ret = 0; + ASSERT_EQ(OB_SUCCESS, arr_var1->compare(*arr_var2, cmp_ret)); + ASSERT_EQ(0, cmp_ret); + ObString arr2_text("[3.14, 2.414, 2.718]"); + arr_var2->clear(); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr2_text, arr_var2, dst_elem_type)); + ASSERT_EQ(OB_SUCCESS, arr_var2->init()); + cmp_ret = 0; + ASSERT_EQ(OB_SUCCESS, arr_var1->compare(*arr_var2, cmp_ret)); + ASSERT_EQ(-1, cmp_ret); + + ObString arr3_text("[3.14, 1.414]"); + arr_var2->clear(); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr3_text, arr_var2, dst_elem_type)); + ASSERT_EQ(OB_SUCCESS, arr_var2->init()); + cmp_ret = 0; + ASSERT_EQ(OB_SUCCESS, arr_var1->compare(*arr_var2, cmp_ret)); + ASSERT_EQ(1, cmp_ret); + +} + +TEST_F(TestArrayMeta, varchar_array_construct) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObSqlCollectionInfo type1_info_parse(allocator); + ObString type1_name(strlen("ARRAY(VARCHAR(256))"), "ARRAY(VARCHAR(256))"); + type1_info_parse.set_name(type1_name); + ASSERT_EQ(OB_SUCCESS, type1_info_parse.parse_type_info()); + ObIArrayType *arr_var1 = nullptr; + ObIArrayType *arr_var2 = nullptr; + ObCollectionArrayType *arr_type1 = static_cast(type1_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var1)); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var2)); + // construct array from string ["hello", "world"] + ObString arr1_text("[\"hello\", \"hi\"]"); + ObCollectionBasicType *dst_elem_type = static_cast(arr_type1->element_type_); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr1_text, arr_var1, dst_elem_type)); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr1_text, arr_var2, dst_elem_type)); + ASSERT_EQ(OB_SUCCESS, arr_var1->init()); + ASSERT_EQ(OB_SUCCESS, arr_var2->init()); + int cmp_ret = 0; + ASSERT_EQ(OB_SUCCESS, arr_var1->compare(*arr_var2, cmp_ret)); + ASSERT_EQ(0, cmp_ret); + + ObString arr2_text("[\"hi\", \"hello\"]"); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr2_text, arr_var2, dst_elem_type)); + ASSERT_EQ(OB_SUCCESS, arr_var2->init()); + cmp_ret = 0; + ASSERT_EQ(OB_SUCCESS, arr_var1->compare(*arr_var2, cmp_ret)); + ASSERT_EQ(-1, cmp_ret); + +} + +TEST_F(TestArrayMeta, array_nested_compare) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObSqlCollectionInfo type1_info_parse(allocator); + ObString type1_name(strlen("ARRAY(ARRAY(DOUBLE))"), "ARRAY(ARRAY(DOUBLE))"); + type1_info_parse.set_name(type1_name); + ASSERT_EQ(OB_SUCCESS, type1_info_parse.parse_type_info()); + ObIArrayType *arr_var1 = nullptr; + ObIArrayType *arr_var2 = nullptr; + ObCollectionArrayType *arr_type1 = static_cast(type1_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var1)); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var2)); + // construct array from string ["hello", "world"] + ObString arr1_text("[[3.14159, 95.27, null], [null, 8.878, 912.33], [333, 12.134, null]]"); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr1_text, arr_var1, arr_type1->element_type_)); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr1_text, arr_var2, arr_type1->element_type_)); + ASSERT_EQ(OB_SUCCESS, arr_var1->init()); + ASSERT_EQ(OB_SUCCESS, arr_var2->init()); + int cmp_ret = 0; + ASSERT_EQ(OB_SUCCESS, arr_var1->compare(*arr_var2, cmp_ret)); + ASSERT_EQ(0, cmp_ret); + + ObString arr2_text("[[3.14159, 95.27, null, null], [8.878, 912.33], [333, 12.134, null]]"); + arr_var2->clear(); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr2_text, arr_var2, arr_type1->element_type_)); + ASSERT_EQ(OB_SUCCESS, arr_var2->init()); + cmp_ret = 0; + ASSERT_EQ(OB_SUCCESS, arr_var1->compare(*arr_var2, cmp_ret)); + ASSERT_EQ(-1, cmp_ret); +} + +TEST_F(TestArrayMeta, array_contains) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObSqlCollectionInfo type1_info_parse(allocator); + ObString type1_name(strlen("ARRAY(FLOAT)"), "ARRAY(FLOAT)"); + type1_info_parse.set_name(type1_name); + ASSERT_EQ(OB_SUCCESS, type1_info_parse.parse_type_info()); + ObIArrayType *arr_var1 = nullptr; + ObCollectionArrayType *arr_type1 = static_cast(type1_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var1)); + // construct array from string [3.14, 1.414, 2.718] + ObString arr1_text("[3.14, 1.414, 2.718]"); + ObCollectionBasicType *dst_elem_type = static_cast(arr_type1->element_type_); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr1_text, arr_var1, dst_elem_type)); + ASSERT_EQ(OB_SUCCESS, arr_var1->init()); + bool bret = false; + float val_contain = 1.414; + double val_contain_d = 1.414; + float val_not_contain = 1.4; + ASSERT_EQ(OB_SUCCESS, ObArrayUtil::contains(*arr_var1, val_contain, bret)); + ASSERT_EQ(bret, true); + ASSERT_EQ(OB_SUCCESS, ObArrayUtil::contains(*arr_var1, val_contain_d, bret)); + ASSERT_EQ(bret, false); + ASSERT_EQ(OB_SUCCESS, ObArrayUtil::contains(*arr_var1, val_not_contain, bret)); + ASSERT_EQ(bret, false); +} + +TEST_F(TestArrayMeta, varchar_array_contains) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObSqlCollectionInfo type1_info_parse(allocator); + ObString type1_name(strlen("ARRAY(VARCHAR(256))"), "ARRAY(VARCHAR(256))"); + type1_info_parse.set_name(type1_name); + ASSERT_EQ(OB_SUCCESS, type1_info_parse.parse_type_info()); + ObIArrayType *arr_var1 = nullptr; + ObCollectionArrayType *arr_type1 = static_cast(type1_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var1)); + // construct array from string ["hello", "world"] + ObString arr1_text("[\"hello\", \"hi\"]"); + ObCollectionBasicType *dst_elem_type = static_cast(arr_type1->element_type_); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr1_text, arr_var1, dst_elem_type)); + ASSERT_EQ(OB_SUCCESS, arr_var1->init()); + ObString val_contain("hi"); + bool bret = false; + ASSERT_EQ(OB_SUCCESS, ObArrayUtil::contains(*arr_var1, val_contain, bret)); + ASSERT_EQ(bret, true); + ObString val_not_contain("hell"); + ASSERT_EQ(OB_SUCCESS, ObArrayUtil::contains(*arr_var1, val_not_contain, bret)); + ASSERT_EQ(bret, false); +} + +TEST_F(TestArrayMeta, array_nested_contains) +{ + ObArenaAllocator allocator(ObModIds::TEST); + ObSqlCollectionInfo type1_info_parse(allocator); + ObString type1_name(strlen("ARRAY(ARRAY(DOUBLE))"), "ARRAY(ARRAY(DOUBLE))"); + type1_info_parse.set_name(type1_name); + ASSERT_EQ(OB_SUCCESS, type1_info_parse.parse_type_info()); + ObIArrayType *arr_var1 = nullptr; + ObCollectionArrayType *arr_type1 = static_cast(type1_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type1, arr_var1)); + ObString arr1_text("[[3.14159, 95.27, null, null], [8.878, 912.33], [333, 12.134, null]]"); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr1_text, arr_var1, arr_type1->element_type_)); + ASSERT_EQ(OB_SUCCESS, arr_var1->init()); + + ObSqlCollectionInfo type2_info_parse(allocator); + ObString type2_name(strlen("ARRAY(DOUBLE)"), "ARRAY(DOUBLE)"); + type2_info_parse.set_name(type2_name); + ASSERT_EQ(OB_SUCCESS, type2_info_parse.parse_type_info()); + ObIArrayType *arr_var2 = nullptr; + ObCollectionArrayType *arr_type2 = static_cast(type2_info_parse.collection_meta_); + ASSERT_EQ(OB_SUCCESS, ObArrayTypeObjFactory::construct(allocator, *arr_type2, arr_var2)); + ObString arr2_text("[8.878, 912.33]"); + ObCollectionBasicType *dst_elem_type = static_cast(arr_type2->element_type_); + ASSERT_EQ(OB_SUCCESS, sql::ObArrayCastUtils::string_cast(allocator, arr2_text, arr_var2, dst_elem_type)); + ASSERT_EQ(OB_SUCCESS, arr_var2->init()); + + bool bret = false; + ASSERT_EQ(OB_SUCCESS, ObArrayUtil::contains(*arr_var1, *arr_var2, bret)); + ASSERT_EQ(bret, true); +} + +} // namespace common +} // namespace oceanbase + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + //system("rm -f test_array_meta.log"); + //OB_LOGGER.set_file_name("test_array_meta.log"); + //OB_LOGGER.set_log_level("INFO"); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/share/test_defined_expr_func_by_type.result b/unittest/share/test_defined_expr_func_by_type.result index d635139eea..99308c2865 100644 --- a/unittest/share/test_defined_expr_func_by_type.result +++ b/unittest/share/test_defined_expr_func_by_type.result @@ -51,7 +51,7 @@ : defined : not defined : defined - : defined + : defined : defined : defined : defined @@ -109,7 +109,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -167,7 +167,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -225,7 +225,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -283,7 +283,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -341,7 +341,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -399,7 +399,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -457,7 +457,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -515,7 +515,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -573,7 +573,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -631,7 +631,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -689,7 +689,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -747,7 +747,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -805,7 +805,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -863,7 +863,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -921,7 +921,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -979,7 +979,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1037,7 +1037,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1095,7 +1095,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1153,7 +1153,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1211,7 +1211,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1269,7 +1269,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1327,7 +1327,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1385,7 +1385,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1443,7 +1443,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1501,7 +1501,7 @@ : defined : not defined : defined - : defined + : defined : defined : defined : defined @@ -1559,7 +1559,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1617,7 +1617,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1675,7 +1675,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1733,7 +1733,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1791,7 +1791,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1849,7 +1849,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1907,7 +1907,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1965,7 +1965,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2023,7 +2023,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2081,7 +2081,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2139,7 +2139,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2197,7 +2197,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2255,7 +2255,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2313,7 +2313,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2371,7 +2371,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2429,7 +2429,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2487,7 +2487,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2545,7 +2545,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2603,7 +2603,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2661,7 +2661,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2719,7 +2719,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2777,7 +2777,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2835,7 +2835,7 @@ : defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2893,7 +2893,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2951,68 +2951,68 @@ : not defined : not defined : defined - : not defined + : not defined : not defined : not defined : not defined -/**************** COLLECTION ****************/ +/**************** ARRAY ****************/ - : defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined + : defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : defined + : not defined + : not defined + : not defined /**************** MYSQL_DATE ****************/ @@ -3067,7 +3067,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -3125,7 +3125,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -3183,7 +3183,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined diff --git a/unittest/share/test_defined_func_by_type.result b/unittest/share/test_defined_func_by_type.result index d635139eea..99308c2865 100644 --- a/unittest/share/test_defined_func_by_type.result +++ b/unittest/share/test_defined_func_by_type.result @@ -51,7 +51,7 @@ : defined : not defined : defined - : defined + : defined : defined : defined : defined @@ -109,7 +109,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -167,7 +167,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -225,7 +225,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -283,7 +283,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -341,7 +341,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -399,7 +399,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -457,7 +457,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -515,7 +515,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -573,7 +573,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -631,7 +631,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -689,7 +689,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -747,7 +747,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -805,7 +805,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -863,7 +863,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -921,7 +921,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -979,7 +979,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1037,7 +1037,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1095,7 +1095,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1153,7 +1153,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1211,7 +1211,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1269,7 +1269,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1327,7 +1327,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1385,7 +1385,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1443,7 +1443,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1501,7 +1501,7 @@ : defined : not defined : defined - : defined + : defined : defined : defined : defined @@ -1559,7 +1559,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1617,7 +1617,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1675,7 +1675,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1733,7 +1733,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1791,7 +1791,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1849,7 +1849,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1907,7 +1907,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -1965,7 +1965,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2023,7 +2023,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2081,7 +2081,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2139,7 +2139,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2197,7 +2197,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2255,7 +2255,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2313,7 +2313,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2371,7 +2371,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2429,7 +2429,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2487,7 +2487,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2545,7 +2545,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2603,7 +2603,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2661,7 +2661,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2719,7 +2719,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2777,7 +2777,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2835,7 +2835,7 @@ : defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2893,7 +2893,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -2951,68 +2951,68 @@ : not defined : not defined : defined - : not defined + : not defined : not defined : not defined : not defined -/**************** COLLECTION ****************/ +/**************** ARRAY ****************/ - : defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined - : not defined + : defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : not defined + : defined + : not defined + : not defined + : not defined /**************** MYSQL_DATE ****************/ @@ -3067,7 +3067,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -3125,7 +3125,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined @@ -3183,7 +3183,7 @@ : not defined : not defined : not defined - : not defined + : not defined : not defined : not defined : not defined diff --git a/unittest/share/test_vector_index_serialize.cpp b/unittest/share/test_vector_index_serialize.cpp new file mode 100644 index 0000000000..29a45852e2 --- /dev/null +++ b/unittest/share/test_vector_index_serialize.cpp @@ -0,0 +1,166 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SHARE +#include +#define private public +#define protected public +#include "share/vector_index/ob_plugin_vector_index_serialize.h" +#include "common/data_buffer.h" +#undef private +#undef protected + +#include +#include + +namespace oceanbase { +namespace common { +class TestVectorIndexSerialize : public ::testing::Test +{ +public: + TestVectorIndexSerialize() + {} + ~TestVectorIndexSerialize() + {} + +private: + // disallow copy + DISALLOW_COPY_AND_ASSIGN(TestVectorIndexSerialize); +}; + +struct TestOStreamCbParam : public share::ObOStreamBuf::CbParam { + TestOStreamCbParam() + : total_size_(0) + {} + virtual ~TestOStreamCbParam() {} + int64_t total_size_; +}; + +class SerializeCallback { +public: + SerializeCallback(int ret = OB_SUCCESS) + : ret_(ret) + {} + int operator()(const char* data, const int64_t data_size, share::ObOStreamBuf::CbParam &cb_param) { + if (OB_SUCCESS == ret_) { + ObString str(data_size, data); + LOG_INFO("output", K(str)); + TestOStreamCbParam* param = dynamic_cast(&cb_param); + param->total_size_ += data_size; + } + return ret_; + } +private: + int ret_; +}; + +static const char* data_str = "unittest: test plugin vector index serialize"; + +TEST_F(TestVectorIndexSerialize, serialize) +{ + const int MAX_BUF_SIZE = 8LL; + char data[MAX_BUF_SIZE] = {0}; + TestOStreamCbParam cb_param; + SerializeCallback callback; + share::ObOStreamBuf::Callback func = callback; + share::ObOStreamBuf streambuf(data, MAX_BUF_SIZE, cb_param, func); + std::ostream out(&streambuf); + out.write(data_str, strlen(data_str)); + streambuf.check_finish(); + ASSERT_EQ(OB_SUCCESS, streambuf.get_error_code()); + ASSERT_EQ(strlen(data_str), cb_param.total_size_); +} + +TEST_F(TestVectorIndexSerialize, serialize_failed) +{ + const int MAX_BUF_SIZE = 8LL; + char data[MAX_BUF_SIZE] = {0}; + TestOStreamCbParam cb_param; + SerializeCallback callback(OB_ERR_UNEXPECTED); + share::ObOStreamBuf::Callback func = callback; + share::ObOStreamBuf streambuf(data, MAX_BUF_SIZE, cb_param, func); + std::ostream out(&streambuf); + out.write(data_str, strlen(data_str)); + ASSERT_EQ(OB_ERR_UNEXPECTED, streambuf.get_error_code()); + ASSERT_EQ(0, cb_param.total_size_); +} + +struct TestIStreamCbParam : public share::ObIStreamBuf::CbParam { + TestIStreamCbParam() + : cur_(0) + {} + virtual ~TestIStreamCbParam() {} + int64_t cur_; +}; + +class DeserializeCallback { +public: + DeserializeCallback(int ret = OB_SUCCESS) + : ret_(ret) + {} + int operator()(char*& data, const int64_t data_size, int64_t &read_size, share::ObIStreamBuf::CbParam &cb_param) { + if (OB_SUCCESS == ret_) { + TestIStreamCbParam* param = dynamic_cast(&cb_param); + read_size = MIN(strlen(data_str) - param->cur_, data_size); + if (read_size) { + data = const_cast(data_str + param->cur_); + param->cur_ += read_size; + ObString str(read_size, data); + LOG_INFO("input", K(str)); + } + } + return ret_; + } +private: + int ret_; +}; + +TEST_F(TestVectorIndexSerialize, deserialize) +{ + const int MAX_BUF_SIZE = 8LL; + char data[MAX_BUF_SIZE] = {0}; + TestIStreamCbParam cb_param; + DeserializeCallback callback; + share::ObIStreamBuf::Callback func = callback; + share::ObIStreamBuf streambuf(data, MAX_BUF_SIZE, cb_param, func); + std::istream in(&streambuf); + char result[1024] = {0}; + in.read(result, strlen(data_str)); + ASSERT_EQ(OB_SUCCESS, streambuf.get_error_code()); + ASSERT_EQ(strlen(data_str), strlen(result)); +} + +TEST_F(TestVectorIndexSerialize, deserialize_failed) +{ + const int MAX_BUF_SIZE = 8LL; + char data[MAX_BUF_SIZE] = {0}; + share::ObIStreamBuf::CbParam cb_param; + DeserializeCallback callback(OB_ERR_UNEXPECTED); + share::ObIStreamBuf::Callback func = callback; + share::ObIStreamBuf streambuf(data, MAX_BUF_SIZE, cb_param, func); + std::istream in(&streambuf); + char result[1024] = {0}; + in.read(result, strlen(data_str)); + ASSERT_EQ(OB_ERR_UNEXPECTED, streambuf.get_error_code()); + ASSERT_EQ(0, strlen(result)); +} + +} // namespace common +} // namespace oceanbase + +int main(int argc, char** argv) +{ + oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); + OB_LOGGER.set_log_level("INFO"); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/sql/parser/print_parser_tree.result b/unittest/sql/parser/print_parser_tree.result index 320ee7c2bc..e500b96ac3 100644 --- a/unittest/sql/parser/print_parser_tree.result +++ b/unittest/sql/parser/print_parser_tree.result @@ -357,7 +357,7 @@ question_mask_size: 0 |--[1],[T_IDENT], str_value_=[d1], value=[9223372036854775807] |--[2],[T_IDENT], str_value_=[c1], value=[9223372036854775807] |--[1],[T_SORT_DESC], str_value_=[], value=[2] - |--[15],[T_COMMA_LIMIT_CLAUSE], str_value_=[], value=[9223372036854775807] + |--[16],[T_COMMA_LIMIT_CLAUSE], str_value_=[], value=[9223372036854775807] |--[0],[T_INT], str_value_=[0], value=[0] |--[1],[T_INT], str_value_=[1], value=[1] @@ -410,7 +410,7 @@ question_mask_size: 0 |--[1],[T_IDENT], str_value_=[t1], value=[9223372036854775807] |--[2],[T_IDENT], str_value_=[c1], value=[9223372036854775807] |--[1],[T_SORT_DESC], str_value_=[], value=[2] - |--[15],[T_COMMA_LIMIT_CLAUSE], str_value_=[], value=[9223372036854775807] + |--[16],[T_COMMA_LIMIT_CLAUSE], str_value_=[], value=[9223372036854775807] |--[0],[T_INT], str_value_=[0], value=[0] |--[1],[T_INT], str_value_=[1], value=[1] @@ -458,7 +458,7 @@ question_mask_size: 0 |--[0],[T_COLUMN_REF], str_value_=[c1], value=[9223372036854775807] |--[2],[T_IDENT], str_value_=[c1], value=[9223372036854775807] |--[1],[T_SORT_DESC], str_value_=[], value=[2] - |--[15],[T_COMMA_LIMIT_CLAUSE], str_value_=[], value=[9223372036854775807] + |--[16],[T_COMMA_LIMIT_CLAUSE], str_value_=[], value=[9223372036854775807] |--[0],[T_INT], str_value_=[0], value=[0] |--[1],[T_INT], str_value_=[1], value=[1] @@ -3340,7 +3340,7 @@ question_mask_size: 0 |--[0],[T_ORG], str_value_=[], value=[9223372036854775807] |--[0],[T_RELATION_FACTOR], str_value_=[t1], value=[9223372036854775807] |--[1],[T_IDENT], str_value_=[t1], value=[9223372036854775807] - |--[17],[T_HINT_OPTION_LIST], str_value_=[], value=[9223372036854775807] + |--[18],[T_HINT_OPTION_LIST], str_value_=[], value=[9223372036854775807] ************** Case 172 *************** select /*+max_concurrent(10)*/* from t1; @@ -3355,7 +3355,7 @@ question_mask_size: 0 |--[0],[T_ORG], str_value_=[], value=[9223372036854775807] |--[0],[T_RELATION_FACTOR], str_value_=[t1], value=[9223372036854775807] |--[1],[T_IDENT], str_value_=[t1], value=[9223372036854775807] - |--[17],[T_HINT_OPTION_LIST], str_value_=[], value=[9223372036854775807] + |--[18],[T_HINT_OPTION_LIST], str_value_=[], value=[9223372036854775807] |--[0],[T_MAX_CONCURRENT], str_value_=[], value=[9223372036854775807] |--[0],[T_INT], str_value_=[10], value=[10] diff --git a/unittest/sql/parser/test_parser.result b/unittest/sql/parser/test_parser.result index 6a040c949d..49ee0b363f 100644 --- a/unittest/sql/parser/test_parser.result +++ b/unittest/sql/parser/test_parser.result @@ -1735,6 +1735,7 @@ question_mask_size: 0 { } ] }, + { }, { "type":"T_COMMA_LIMIT_CLAUSE", "int_val":9223372036854775807, @@ -2119,6 +2120,7 @@ question_mask_size: 0 { } ] }, + { }, { "type":"T_COMMA_LIMIT_CLAUSE", "int_val":9223372036854775807, @@ -2478,6 +2480,7 @@ question_mask_size: 0 { } ] }, + { }, { "type":"T_COMMA_LIMIT_CLAUSE", "int_val":9223372036854775807, @@ -4113,6 +4116,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -4399,6 +4403,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -4675,6 +4680,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -6253,6 +6259,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -7312,6 +7319,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -8883,6 +8891,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -8909,6 +8918,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -9154,6 +9164,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -9180,6 +9191,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -9425,6 +9437,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -9451,6 +9464,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -9690,6 +9704,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -9714,6 +9729,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -9959,6 +9975,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -9985,6 +10002,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -10230,6 +10248,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -10256,6 +10275,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -10501,6 +10521,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -10527,6 +10548,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -10766,6 +10788,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -10790,6 +10813,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -11035,6 +11059,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -11061,6 +11086,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -11306,6 +11332,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -11332,6 +11359,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -11577,6 +11605,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -11603,6 +11632,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -11842,6 +11872,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -11866,6 +11897,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -12111,6 +12143,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -12137,6 +12170,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -12382,6 +12416,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -12408,6 +12443,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -12653,6 +12689,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -12679,6 +12716,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -12918,6 +12956,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -12942,6 +12981,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -13187,6 +13227,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -13213,6 +13254,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -13458,6 +13500,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -13484,6 +13527,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -13729,6 +13773,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -13755,6 +13800,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -13994,6 +14040,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -14018,6 +14065,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -14263,6 +14311,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -14289,6 +14338,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -14534,6 +14584,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -14560,6 +14611,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -14805,6 +14857,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -14831,6 +14884,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -15070,6 +15124,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -15094,6 +15149,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -15333,6 +15389,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -15357,6 +15414,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -15596,6 +15654,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -15620,6 +15679,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -15843,6 +15903,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -15867,6 +15928,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -16096,6 +16158,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -16122,6 +16185,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -16307,6 +16371,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] }, @@ -16403,6 +16468,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -16427,6 +16493,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -16612,6 +16679,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] }, @@ -16708,6 +16776,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -16732,6 +16801,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -16933,6 +17003,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -16957,6 +17028,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -17206,6 +17278,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -17230,6 +17303,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -17479,6 +17553,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -17503,6 +17578,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -18018,6 +18094,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -18153,6 +18230,7 @@ question_mask_size: 1 { }, { }, { }, + { }, { } ] } @@ -18288,6 +18366,7 @@ question_mask_size: 1 { }, { }, { }, + { }, { } ] } @@ -18511,6 +18590,7 @@ question_mask_size: 2 { }, { }, { }, + { }, { } ] }, @@ -18645,6 +18725,7 @@ question_mask_size: 2 { }, { }, { }, + { }, { } ] } @@ -18669,6 +18750,7 @@ question_mask_size: 2 { }, { }, { }, + { }, { } ] } @@ -18809,6 +18891,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -18949,6 +19032,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -19089,6 +19173,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -19272,6 +19357,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -19344,6 +19430,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -19416,6 +19503,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -20113,6 +20201,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] }, @@ -20246,6 +20335,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] }, @@ -20374,6 +20464,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] }, @@ -20502,6 +20593,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] }, @@ -20630,6 +20722,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] }, @@ -20716,6 +20809,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -20843,6 +20937,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] }, @@ -20929,6 +21024,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -22068,6 +22164,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { "type":"T_HINT_OPTION_LIST", "int_val":9223372036854775807, @@ -22170,6 +22267,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { "type":"T_HINT_OPTION_LIST", "int_val":9223372036854775807, @@ -22375,6 +22473,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -22533,6 +22632,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -22692,6 +22792,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } @@ -22851,6 +22952,7 @@ question_mask_size: 0 { }, { }, { }, + { }, { } ] } diff --git a/unittest/storage/blocksstable/cs_encoding/ob_cs_encoding_test_base.h b/unittest/storage/blocksstable/cs_encoding/ob_cs_encoding_test_base.h index 6c74f38743..60c2d5911a 100644 --- a/unittest/storage/blocksstable/cs_encoding/ob_cs_encoding_test_base.h +++ b/unittest/storage/blocksstable/cs_encoding/ob_cs_encoding_test_base.h @@ -347,7 +347,7 @@ int ObCSEncodingTestBase::check_decode_vector(ObMicroBlockCSDecoder &decoder, } } else if (vector_format == VEC_DISCRETE || vector_format == VEC_UNIFORM) { VecValueTypeClass var_tc_arr[] = {VEC_TC_NUMBER, VEC_TC_EXTEND, VEC_TC_STRING, VEC_TC_ENUM_SET_INNER, - VEC_TC_RAW, VEC_TC_ROWID, VEC_TC_LOB, VEC_TC_JSON, VEC_TC_GEO, VEC_TC_UDT, VEC_TC_ROARINGBITMAP}; + VEC_TC_RAW, VEC_TC_ROWID, VEC_TC_LOB, VEC_TC_JSON, VEC_TC_GEO, VEC_TC_UDT, VEC_TC_COLLECTION, VEC_TC_ROARINGBITMAP}; VecValueTypeClass *vec = std::find(std::begin(var_tc_arr), std::end(var_tc_arr), vec_tc); if (vec == std::end(var_tc_arr)) { need_test_column = false; diff --git a/unittest/storage/blocksstable/encoding/test_column_decoder.h b/unittest/storage/blocksstable/encoding/test_column_decoder.h index c8d30a8870..df14e5d7d9 100644 --- a/unittest/storage/blocksstable/encoding/test_column_decoder.h +++ b/unittest/storage/blocksstable/encoding/test_column_decoder.h @@ -2257,7 +2257,7 @@ bool VectorDecodeTestUtil::need_test_vec_with_type( } } else if (vector_format == VEC_DISCRETE) { VecValueTypeClass var_tc_arr[] = {VEC_TC_NUMBER, VEC_TC_EXTEND, VEC_TC_STRING, VEC_TC_ENUM_SET_INNER, - VEC_TC_RAW, VEC_TC_ROWID, VEC_TC_LOB, VEC_TC_JSON, VEC_TC_GEO, VEC_TC_UDT, VEC_TC_ROARINGBITMAP}; + VEC_TC_RAW, VEC_TC_ROWID, VEC_TC_LOB, VEC_TC_JSON, VEC_TC_GEO, VEC_TC_UDT, VEC_TC_COLLECTION, VEC_TC_ROARINGBITMAP}; VecValueTypeClass *vec = std::find(std::begin(var_tc_arr), std::end(var_tc_arr), vec_tc); if (vec == std::end(var_tc_arr)) { need_test_column = false; diff --git a/unittest/storage/blocksstable/encoding/test_raw_decoder.cpp b/unittest/storage/blocksstable/encoding/test_raw_decoder.cpp index 1c2920398e..cf5596a278 100644 --- a/unittest/storage/blocksstable/encoding/test_raw_decoder.cpp +++ b/unittest/storage/blocksstable/encoding/test_raw_decoder.cpp @@ -731,7 +731,7 @@ void TestRawDecoder::test_batch_decode_to_vector( } } else if (vector_format == VEC_DISCRETE) { VecValueTypeClass var_tc_arr[] = {VEC_TC_NUMBER, VEC_TC_EXTEND, VEC_TC_STRING, VEC_TC_ENUM_SET_INNER, - VEC_TC_RAW, VEC_TC_ROWID, VEC_TC_LOB, VEC_TC_JSON, VEC_TC_GEO, VEC_TC_UDT, VEC_TC_ROARINGBITMAP}; + VEC_TC_RAW, VEC_TC_ROWID, VEC_TC_LOB, VEC_TC_JSON, VEC_TC_GEO, VEC_TC_UDT, VEC_TC_COLLECTION, VEC_TC_ROARINGBITMAP}; VecValueTypeClass *vec = std::find(std::begin(var_tc_arr), std::end(var_tc_arr), vec_tc); if (vec == std::end(var_tc_arr)) { need_test_column = false;