From a15a0b91935f07f5269f28ba82065ae8a19590ed Mon Sep 17 00:00:00 2001 From: Pxl Date: Thu, 8 Jun 2023 19:36:21 +0800 Subject: [PATCH] [Chore](build) use file(GLOB_RECURSE xxx CONFIGURE_DEPENDS) to replace set cpp (#20461) use file(GLOB_RECURSE xxx CONFIGURE_DEPENDS) to replace set cpp --- .clang-format-ignore | 4 - .licenserc.yaml | 1 - be/src/agent/CMakeLists.txt | 9 +- be/src/common/CMakeLists.txt | 11 +- be/src/exec/CMakeLists.txt | 34 +- be/src/exprs/CMakeLists.txt | 11 +- be/src/http/CMakeLists.txt | 41 +- be/src/io/CMakeLists.txt | 42 +- be/src/olap/CMakeLists.txt | 96 +--- .../rowset/segment_v2/ngram_bloom_filter.cpp | 6 +- be/src/pipeline/CMakeLists.txt | 47 +- be/src/runtime/CMakeLists.txt | 63 +-- be/src/runtime/export_task_mgr.cpp | 230 --------- be/src/runtime/export_task_mgr.h | 81 --- be/src/service/CMakeLists.txt | 12 +- be/src/util/CMakeLists.txt | 87 +--- be/src/util/cityhash102/city.cc | 468 ------------------ be/src/util/cityhash102/city.h | 104 ---- be/src/util/cityhash102/citycrc.h | 48 -- be/src/util/cityhash102/config.h | 125 ----- be/src/vec/CMakeLists.txt | 345 +------------ be/test/CMakeLists.txt | 291 ++--------- be/test/agent/agent_server_test.cpp | 178 ------- be/test/io/fs/remote_file_system_test.cpp | 5 - be/test/olap/memtable_flush_executor_test.cpp | 44 +- be/test/olap/remote_rowset_gc_test.cpp | 3 - be/test/olap/row_cursor_test.cpp | 40 -- be/test/olap/rowset/beta_rowset_test.cpp | 2 +- be/test/runtime/export_task_mgr_test.cpp | 216 -------- be/test/runtime/jsonb_value_test.cpp | 13 +- be/test/runtime/large_int_value_test.cpp | 2 +- ...sc_tbl_builder.cc => desc_tbl_builder.cpp} | 0 be/test/tools/benchmark_tool.cpp | 280 ----------- .../serde/data_type_serde_pb_test.cpp | 8 +- .../data_types/serde/data_type_serde_test.cpp | 8 +- 35 files changed, 88 insertions(+), 2867 deletions(-) delete mode 100644 be/src/runtime/export_task_mgr.cpp delete mode 100644 be/src/runtime/export_task_mgr.h delete mode 100644 be/src/util/cityhash102/city.cc delete mode 100644 be/src/util/cityhash102/city.h delete mode 100644 be/src/util/cityhash102/citycrc.h delete mode 100644 be/src/util/cityhash102/config.h delete mode 100644 be/test/agent/agent_server_test.cpp delete mode 100644 be/test/runtime/export_task_mgr_test.cpp rename be/test/testutil/{desc_tbl_builder.cc => desc_tbl_builder.cpp} (100%) diff --git a/.clang-format-ignore b/.clang-format-ignore index 55ac1e1395..fbbc4cb044 100644 --- a/.clang-format-ignore +++ b/.clang-format-ignore @@ -9,7 +9,3 @@ be/src/util/sse2neon.h be/src/util/mustache/mustache.h be/src/util/mustache/mustache.cc be/src/util/utf8_check.cpp -be/src/util/cityhash102/city.h -be/src/util/cityhash102/city.cc -be/src/util/cityhash102/citycrc.h -be/src/util/cityhash102/config.h diff --git a/.licenserc.yaml b/.licenserc.yaml index d52378d5b5..0a3f0b08e8 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -66,7 +66,6 @@ header: - "be/src/util/sse2neo.h" - "be/src/util/sse2neon.h" - "be/src/util/utf8_check.cpp" - - "be/src/util/cityhash102" - "be/src/pch/*" - "build-support/run_clang_format.py" - "regression-test/data" diff --git a/be/src/agent/CMakeLists.txt b/be/src/agent/CMakeLists.txt index 538b1d4d0a..c680c41d6b 100644 --- a/be/src/agent/CMakeLists.txt +++ b/be/src/agent/CMakeLists.txt @@ -21,14 +21,7 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/agent") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/agent") -set(AGENT_SOURCES - agent_server.cpp - heartbeat_server.cpp - task_worker_pool.cpp - utils.cpp - topic_subscriber.cpp - user_resource_listener.cpp -) +file(GLOB_RECURSE AGENT_SOURCES CONFIGURE_DEPENDS *.cpp) if (OS_MACOSX) list(REMOVE_ITEM AGENT_SOURCES user_resource_listener.cpp) diff --git a/be/src/common/CMakeLists.txt b/be/src/common/CMakeLists.txt index d72c02885d..8cbf3690a7 100644 --- a/be/src/common/CMakeLists.txt +++ b/be/src/common/CMakeLists.txt @@ -18,15 +18,8 @@ # where to put generated libraries set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/common") -add_library(Common STATIC - daemon.cpp - status.cpp - resource_tls.cpp - logconfig.cpp - config.cpp - exception.cpp - version_internal.cpp -) +file(GLOB_RECURSE SRC_FILES CONFIGURE_DEPENDS *.cpp) +add_library(Common STATIC ${SRC_FILES}) pch_reuse(Common) diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index f8590c7ae2..1f9da86abf 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -21,39 +21,7 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/exec") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/exec") -set(EXEC_FILES - data_sink.cpp - decompressor.cpp - exec_node.cpp - text_converter.cpp - olap_common.cpp - tablet_info.cpp - es/es_scan_reader.cpp - es/es_scroll_query.cpp - es/es_scroll_parser.cpp - schema_scanner.cpp - schema_scanner/schema_tables_scanner.cpp - schema_scanner/schema_dummy_scanner.cpp - schema_scanner/schema_schemata_scanner.cpp - schema_scanner/schema_variables_scanner.cpp - schema_scanner/schema_columns_scanner.cpp - schema_scanner/schema_charsets_scanner.cpp - schema_scanner/schema_collations_scanner.cpp - schema_scanner/schema_helper.cpp - schema_scanner/schema_views_scanner.cpp - schema_scanner/schema_statistics_scanner.cpp - schema_scanner/schema_table_privileges_scanner.cpp - schema_scanner/schema_schema_privileges_scanner.cpp - schema_scanner/schema_user_privileges_scanner.cpp - schema_scanner/schema_files_scanner.cpp - schema_scanner/schema_partitions_scanner.cpp - schema_scanner/schema_rowsets_scanner.cpp - scan_node.cpp - odbc_connector.cpp - table_connector.cpp - schema_scanner.cpp - rowid_fetcher.cpp -) +file(GLOB_RECURSE EXEC_FILES CONFIGURE_DEPENDS *.cpp) if (WITH_LZO) set(EXEC_FILES ${EXEC_FILES} diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt index bda3ba9df0..6442c84a06 100644 --- a/be/src/exprs/CMakeLists.txt +++ b/be/src/exprs/CMakeLists.txt @@ -21,14 +21,7 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/exprs") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/exprs") -add_library(Exprs - block_bloom_filter_avx_impl.cc - block_bloom_filter_impl.cc - runtime_filter.cpp - runtime_filter_rpc.cpp - math_functions.cpp - string_functions.cpp - json_functions.cpp -) +file(GLOB_RECURSE SRC_FILES CONFIGURE_DEPENDS *.cpp *.cc) +add_library(Exprs STATIC ${SRC_FILES}) pch_reuse(Exprs) \ No newline at end of file diff --git a/be/src/http/CMakeLists.txt b/be/src/http/CMakeLists.txt index 87163346f7..92b196f8c7 100644 --- a/be/src/http/CMakeLists.txt +++ b/be/src/http/CMakeLists.txt @@ -21,42 +21,9 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/http") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/http") -add_library(Webserver STATIC - http_headers.cpp - http_method.cpp - http_request.cpp - http_channel.cpp - http_status.cpp - http_parser.cpp - http_handler_with_auth.cpp - web_page_handler.cpp - default_path_handlers.cpp - utils.cpp - ev_http_server.cpp - http_client.cpp - action/download_action.cpp - action/download_binlog_action.cpp - action/pad_rowset_action.cpp - action/health_action.cpp - action/tablet_migration_action.cpp - action/tablets_info_action.cpp - action/tablets_distribution_action.cpp - action/checksum_action.cpp - action/snapshot_action.cpp - action/reload_tablet_action.cpp - action/restore_tablet_action.cpp - action/pprof_actions.cpp - action/metrics_action.cpp - action/stream_load.cpp - action/stream_load_2pc.cpp - action/meta_action.cpp - action/compaction_action.cpp - action/config_action.cpp - action/check_rpc_channel_action.cpp - action/reset_rpc_channel_action.cpp - action/check_tablet_segment_action.cpp - action/version_action.cpp - action/jeprofile_actions.cpp - action/file_cache_action.cpp) +file(GLOB_RECURSE SRC_FILES CONFIGURE_DEPENDS + *.cpp +) +add_library(Webserver STATIC ${SRC_FILES}) pch_reuse(Webserver) diff --git a/be/src/io/CMakeLists.txt b/be/src/io/CMakeLists.txt index 5efcf3cdab..7eba277f8b 100644 --- a/be/src/io/CMakeLists.txt +++ b/be/src/io/CMakeLists.txt @@ -21,46 +21,8 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/io") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/io") -set(IO_FILES - file_factory.cpp - hdfs_builder.cpp - fs/file_reader_options.cpp - fs/file_reader.cpp - fs/file_system.cpp - fs/remote_file_system.cpp - fs/local_file_system.cpp - fs/local_file_reader.cpp - fs/local_file_writer.cpp - fs/s3_file_system.cpp - fs/s3_file_reader.cpp - fs/s3_file_writer.cpp - fs/s3_file_write_bufferpool.cpp - fs/hdfs_file_system.cpp - fs/hdfs_file_reader.cpp - fs/hdfs_file_writer.cpp - fs/broker_file_system.cpp - fs/broker_file_reader.cpp - fs/broker_file_writer.cpp - fs/buffered_reader.cpp - fs/stream_load_pipe.cpp - fs/multi_table_pipe.cpp - fs/err_utils.cpp - fs/fs_utils.cpp - cache/dummy_file_cache.cpp - cache/file_cache.cpp - cache/file_cache_manager.cpp - cache/sub_file_cache.cpp - cache/whole_file_cache.cpp - cache/block/block_file_segment.cpp - cache/block/block_file_cache.cpp - cache/block/block_file_cache_profile.cpp - cache/block/block_file_cache_factory.cpp - cache/block/block_lru_file_cache.cpp - cache/block/cached_remote_file_reader.cpp -) +file(GLOB_RECURSE IO_FILES CONFIGURE_DEPENDS *.cpp) -add_library(IO STATIC - ${IO_FILES} -) +add_library(IO STATIC ${IO_FILES}) pch_reuse(IO) \ No newline at end of file diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index 50ecd5653f..a9122baa48 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -23,100 +23,8 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/olap") add_subdirectory(rowset) -add_library(Olap STATIC - base_compaction.cpp - base_tablet.cpp - binlog_config.cpp - bloom_filter.hpp - block_column_predicate.cpp - cold_data_compaction.cpp - compaction.cpp - compaction_permit_limiter.cpp - cumulative_compaction.cpp - cumulative_compaction_policy.cpp - delete_bitmap_calculator.cpp - delete_handler.cpp - delta_writer.cpp - hll.cpp - inverted_index_parser.cpp - itoken_extractor.cpp - like_column_predicate.cpp - key_coder.cpp - lru_cache.cpp - match_predicate.cpp - memtable.cpp - memtable_flush_executor.cpp - merger.cpp - null_predicate.cpp - olap_meta.cpp - olap_server.cpp - schema_cache.cpp - options.cpp - page_cache.cpp - push_handler.cpp - reader.cpp - row_cursor.cpp - version_graph.cpp - schema.cpp - schema_change.cpp - storage_engine.cpp - data_dir.cpp - short_key_index.cpp - single_replica_compaction.cpp - snapshot_manager.cpp - tablet.cpp - tablet_manager.cpp - tablet_meta.cpp - tablet_meta_manager.cpp - tablet_schema.cpp - tablet_schema_cache.cpp - txn_manager.cpp - types.cpp - utils.cpp - wrapper_field.cpp - primary_key_index.cpp - rowset/segment_v2/bitmap_index_reader.cpp - rowset/segment_v2/bitmap_index_writer.cpp - rowset/segment_v2/inverted_index_reader.cpp - rowset/segment_v2/inverted_index_writer.cpp - rowset/segment_v2/inverted_index_cache.cpp - rowset/segment_v2/inverted_index_desc.cpp - rowset/segment_v2/inverted_index_compound_directory.cpp - rowset/segment_v2/inverted_index_compound_reader.cpp - rowset/segment_v2/inverted_index_compaction.cpp - rowset/segment_v2/bitshuffle_page.cpp - rowset/segment_v2/bitshuffle_wrapper.cpp - rowset/segment_v2/column_reader.cpp - rowset/segment_v2/column_writer.cpp - rowset/segment_v2/encoding_info.cpp - rowset/segment_v2/index_page.cpp - rowset/segment_v2/indexed_column_reader.cpp - rowset/segment_v2/indexed_column_writer.cpp - rowset/segment_v2/ordinal_page_index.cpp - rowset/segment_v2/page_io.cpp - rowset/segment_v2/binary_dict_page.cpp - rowset/segment_v2/binary_prefix_page.cpp - rowset/segment_v2/segment.cpp - rowset/segment_v2/segment_iterator.cpp - rowset/segment_v2/empty_segment_iterator.cpp - rowset/segment_v2/segment_writer.cpp - rowset/segment_v2/block_split_bloom_filter.cpp - rowset/segment_v2/ngram_bloom_filter.cpp - rowset/segment_v2/bloom_filter_index_reader.cpp - rowset/segment_v2/bloom_filter_index_writer.cpp - rowset/segment_v2/bloom_filter.cpp - rowset/segment_v2/zone_map_index.cpp - task/engine_batch_load_task.cpp - task/engine_checksum_task.cpp - task/engine_clone_task.cpp - task/engine_storage_migration_task.cpp - task/engine_publish_version_task.cpp - task/engine_alter_tablet_task.cpp - task/engine_index_change_task.cpp - task/index_builder.cpp - segment_loader.cpp - storage_policy.cpp -) +file(GLOB_RECURSE SRC_FILES CONFIGURE_DEPENDS *.cpp) +add_library(Olap STATIC ${SRC_FILES}) if (NOT USE_MEM_TRACKER) target_compile_options(Olap PRIVATE -Wno-unused-lambda-capture) diff --git a/be/src/olap/rowset/segment_v2/ngram_bloom_filter.cpp b/be/src/olap/rowset/segment_v2/ngram_bloom_filter.cpp index 54f49b8bb6..a3227f8e75 100644 --- a/be/src/olap/rowset/segment_v2/ngram_bloom_filter.cpp +++ b/be/src/olap/rowset/segment_v2/ngram_bloom_filter.cpp @@ -20,8 +20,8 @@ #include #include +#include "gutil/hash/city.h" #include "gutil/strings/substitute.h" -#include "util/cityhash102/city.h" namespace doris { namespace segment_v2 { @@ -54,8 +54,8 @@ Status NGramBloomFilter::init(const char* buf, uint32_t size, HashStrategyPB str } void NGramBloomFilter::add_bytes(const char* data, uint32_t len) { - size_t hash1 = CityHash_v1_0_2::CityHash64WithSeed(data, len, 0); - size_t hash2 = CityHash_v1_0_2::CityHash64WithSeed(data, len, SEED_GEN); + size_t hash1 = util_hash::CityHash64WithSeed(data, len, 0); + size_t hash2 = util_hash::CityHash64WithSeed(data, len, SEED_GEN); for (size_t i = 0; i < HASH_FUNCTIONS; ++i) { size_t pos = (hash1 + i * hash2 + i * i) % (8 * _size); diff --git a/be/src/pipeline/CMakeLists.txt b/be/src/pipeline/CMakeLists.txt index 1b3c2784be..fc69608d7a 100644 --- a/be/src/pipeline/CMakeLists.txt +++ b/be/src/pipeline/CMakeLists.txt @@ -21,51 +21,10 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/pipeline") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/pipeline") -set(PIPELINE_FILES - pipeline.cpp - pipeline_fragment_context.cpp - pipeline_task.cpp - task_queue.cpp - task_scheduler.cpp - exec/operator.cpp - exec/scan_operator.cpp - exec/schema_scan_operator.cpp - exec/datagen_operator.cpp - exec/empty_set_operator.cpp - exec/exchange_source_operator.cpp - exec/exchange_sink_operator.cpp - exec/exchange_sink_buffer.cpp - exec/result_sink_operator.cpp - exec/result_file_sink_operator.cpp - exec/aggregation_sink_operator.cpp - exec/aggregation_source_operator.cpp - exec/hashjoin_build_sink.cpp - exec/hashjoin_probe_operator.cpp - exec/analytic_sink_operator.cpp - exec/analytic_source_operator.cpp - exec/streaming_aggregation_source_operator.cpp - exec/streaming_aggregation_sink_operator.cpp - exec/sort_source_operator.cpp - exec/sort_sink_operator.cpp - exec/repeat_operator.cpp - exec/table_function_operator.cpp - exec/nested_loop_join_build_operator.cpp - exec/nested_loop_join_probe_operator.cpp - exec/set_sink_operator.cpp - exec/set_source_operator.cpp - exec/set_probe_sink_operator.cpp - exec/union_sink_operator.cpp - exec/union_source_operator.cpp - exec/data_queue.cpp - exec/select_operator.cpp - exec/empty_source_operator.cpp - exec/multi_cast_data_streamer.cpp - exec/multi_cast_data_stream_source.cpp) +file(GLOB_RECURSE PIPELINE_FILES CONFIGURE_DEPENDS *.cpp) -if (WITH_MYSQL) - set(PIPELINE_FILES - ${PIPELINE_FILES} - exec/mysql_scan_operator.cpp) +if (NOT WITH_MYSQL) + list(REMOVE_ITEM PIPELINE_FILES ${CMAKE_CURRENT_SOURCE_DIR}/exec/mysql_scan_operator.cpp) endif () add_library(Pipeline STATIC diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index 44054bce73..a0b3b799a7 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -23,67 +23,10 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/runtime") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/runtime") -set(RUNTIME_FILES - broker_mgr.cpp - buffer_control_block.cpp - client_cache.cpp - descriptors.cpp - exec_env.cpp - exec_env_init.cpp - user_function_cache.cpp - plan_fragment_executor.cpp - primitive_type.cpp - result_buffer_mgr.cpp - result_writer.cpp - runtime_state.cpp - runtime_filter_mgr.cpp - runtime_predicate.cpp - jsonb_value.cpp - thread_context.cpp - threadlocal.cc - decimalv2_value.cpp - large_int_value.cpp - struct_value.cpp - collection_value.cpp - map_value.cpp - fragment_mgr.cpp - load_path_mgr.cpp - types.cpp - load_channel_mgr.cpp - load_channel.cpp - tablets_channel.cpp - snapshot_loader.cpp - query_statistics.cpp - message_body_sink.cpp - stream_load/stream_load_context.cpp - stream_load/stream_load_executor.cpp - stream_load/stream_load_recorder.cpp - stream_load/new_load_stream_mgr.cpp - routine_load/data_consumer.cpp - routine_load/data_consumer_group.cpp - routine_load/data_consumer_pool.cpp - routine_load/routine_load_task_executor.cpp - small_file_mgr.cpp - record_batch_queue.cpp - result_queue_mgr.cpp - external_scan_context_mgr.cpp - memory/system_allocator.cpp - memory/chunk_allocator.cpp - memory/mem_tracker_limiter.cpp - memory/mem_tracker.cpp - memory/thread_mem_tracker_mgr.cpp - fold_constant_executor.cpp - cache/result_node.cpp - cache/result_cache.cpp - block_spill_manager.cpp - task_group/task_group.cpp - task_group/task_group_manager.cpp -) +file(GLOB_RECURSE RUNTIME_FILES CONFIGURE_DEPENDS *.cpp *.cc) -if (USE_JEMALLOC AND USE_MEM_TRACKER) - set(RUNTIME_FILES ${RUNTIME_FILES} - memory/jemalloc_hook.cpp - ) +if (NOT USE_JEMALLOC OR NOT USE_MEM_TRACKER) + list(REMOVE_ITEM RUNTIME_FILES ${CMAKE_CURRENT_SOURCE_DIR}/memory/jemalloc_hook.cpp) endif() add_library(Runtime STATIC diff --git a/be/src/runtime/export_task_mgr.cpp b/be/src/runtime/export_task_mgr.cpp deleted file mode 100644 index 5eb2e68fb1..0000000000 --- a/be/src/runtime/export_task_mgr.cpp +++ /dev/null @@ -1,230 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/export_task_mgr.h" - -#include -#include -#include -#include - -#include "runtime/client_cache.h" -#include "runtime/exec_env.h" -#include "runtime/fragment_mgr.h" -#include "runtime/runtime_state.h" -#include "util/uid_util.h" - -namespace doris { - -#define VLOG_EXPORT VLOG_CRITICAL - -static size_t LRU_MAX_CASH_TASK_NUM = 1000; - -ExportTaskMgr::ExportTaskMgr(ExecEnv* exec_env) - : _exec_env(exec_env), - _success_tasks(LRU_MAX_CASH_TASK_NUM), - _failed_tasks(LRU_MAX_CASH_TASK_NUM) {} - -Status ExportTaskMgr::init() { - return Status::OK(); -} - -Status ExportTaskMgr::start_task(const TExportTaskRequest& request) { - const TUniqueId& id = request.params.params.fragment_instance_id; - std::lock_guard l(_lock); - auto it = _running_tasks.find(id); - if (it != _running_tasks.end()) { - // Already have this task, return what??? - LOG(INFO) << "Duplicated export task(" << id << ")"; - return Status::OK(); - } - - // If already success, we return Status::OK() - // and wait master ask me success information - if (_success_tasks.exists(id)) { - // Already success - LOG(INFO) << "Already successful export task(" << id << ")"; - return Status::OK(); - } - - RETURN_IF_ERROR(_exec_env->fragment_mgr()->exec_plan_fragment( - request.params, std::bind(&ExportTaskMgr::finalize_task, this, - std::placeholders::_1, std::placeholders::_2))); - - // redo this task if failed before - if (_failed_tasks.exists(id)) { - _failed_tasks.erase(id); - } - - VLOG_EXPORT << "accept one export Task. id=" << id; - _running_tasks.insert(id); - - return Status::OK(); -} - -Status ExportTaskMgr::cancel_task(const TUniqueId& id) { - std::lock_guard l(_lock); - auto it = _running_tasks.find(id); - if (it == _running_tasks.end()) { - // Nothing to do - LOG(INFO) << "No such export task id, just print to info " << id; - return Status::OK(); - } - _running_tasks.erase(it); - VLOG_EXPORT << "task id(" << id << ") have been removed from ExportTaskMgr."; - ExportTaskCtx ctx; - ctx.status = Status::Cancelled("Cancelled"); - _failed_tasks.put(id, ctx); - return Status::OK(); -} - -Status ExportTaskMgr::erase_task(const TUniqueId& id) { - std::lock_guard l(_lock); - auto it = _running_tasks.find(id); - if (it != _running_tasks.end()) { - return Status::InternalError("Task({}) is running, can not be deleted.", id); - } - _success_tasks.erase(id); - _failed_tasks.erase(id); - - return Status::OK(); -} - -void ExportTaskMgr::finalize_task(RuntimeState* state, Status* status) { - ExportTaskResult result; - - if (status->ok()) { - result.files = state->export_output_files(); - } - - finish_task(state->fragment_instance_id(), *status, result); - - // Try to report this finished task to master - report_to_master(state); -} - -Status ExportTaskMgr::finish_task(const TUniqueId& id, const Status& status, - const ExportTaskResult& result) { - std::lock_guard l(_lock); - auto it = _running_tasks.find(id); - if (it == _running_tasks.end()) { - return Status::InternalError("Unknown task id({}).", id); - } - _running_tasks.erase(it); - - ExportTaskCtx ctx; - ctx.status = status; - ctx.result = result; - if (status.ok()) { - _success_tasks.put(id, ctx); - } else { - _failed_tasks.put(id, ctx); - } - - VLOG_EXPORT << "Move task(" << id << ") from running to " - << (status.ok() ? "success tasks" : "failed tasks"); - - return Status::OK(); -} - -Status ExportTaskMgr::get_task_state(const TUniqueId& id, TExportStatusResult* result) { - std::lock_guard l(_lock); - auto it = _running_tasks.find(id); - if (it != _running_tasks.end()) { - result->status.__set_status_code(TStatusCode::OK); - result->__set_state(TExportState::RUNNING); - return Status::OK(); - } - - // Successful - if (_success_tasks.exists(id)) { - ExportTaskCtx ctx; - _success_tasks.get(id, &ctx); - result->status.__set_status_code(TStatusCode::OK); - result->__set_state(TExportState::FINISHED); - result->__set_files(ctx.result.files); - return Status::OK(); - } - - // failed information - if (_failed_tasks.exists(id)) { - ExportTaskCtx ctx; - _success_tasks.get(id, &ctx); - result->status.__set_status_code(TStatusCode::OK); - result->__set_state(TExportState::CANCELLED); - return Status::OK(); - } - - // NO this task - result->status.__set_status_code(TStatusCode::OK); - result->__set_state(TExportState::CANCELLED); - return Status::OK(); -} - -void ExportTaskMgr::report_to_master(RuntimeState* state) { - TUpdateExportTaskStatusRequest request; - request.protocolVersion = FrontendServiceVersion::V1; - request.taskId = state->fragment_instance_id(); - Status status = get_task_state(state->fragment_instance_id(), &request.taskStatus); - if (!status.ok()) { - return; - } - const TNetworkAddress& master_address = _exec_env->master_info()->network_address; - FrontendServiceConnection client(_exec_env->frontend_client_cache(), master_address, - config::thrift_rpc_timeout_ms, &status); - if (!status.ok()) { - std::stringstream ss; - ss << "Connect master failed, with address(" << master_address.hostname << ":" - << master_address.port << ")"; - LOG(WARNING) << ss.str(); - return; - } - - VLOG_ROW << "export updateExportTaskStatus. request is " - << apache::thrift::ThriftDebugString(request).c_str(); - - TFeResult res; - try { - try { - client->updateExportTaskStatus(res, request); - } catch (apache::thrift::transport::TTransportException& e) { - LOG(WARNING) << "Retrying report export tasks status to master(" - << master_address.hostname << ":" << master_address.port - << ") because: " << e.what(); - status = client.reopen(config::thrift_rpc_timeout_ms); - if (!status.ok()) { - LOG(WARNING) << "Client repoen failed. with address(" << master_address.hostname - << ":" << master_address.port << ")"; - return; - } - client->updateExportTaskStatus(res, request); - } - } catch (apache::thrift::TException& e) { - // failed when retry. - // reopen to disable this connection - client.reopen(config::thrift_rpc_timeout_ms); - std::stringstream ss; - ss << "Fail to report export task to master(" << master_address.hostname << ":" - << master_address.port << "). reason: " << e.what(); - LOG(WARNING) << ss.str(); - } - - LOG(INFO) << "Successfully report elt task status to master." - << " id=" << print_id(request.taskId); -} - -} // end namespace doris diff --git a/be/src/runtime/export_task_mgr.h b/be/src/runtime/export_task_mgr.h deleted file mode 100644 index 90dd2017cb..0000000000 --- a/be/src/runtime/export_task_mgr.h +++ /dev/null @@ -1,81 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include -#include -#include - -#include "common/status.h" -#include "runtime/runtime_state.h" -#include "util/lru_cache.hpp" - -namespace doris { - -class ExecEnv; -class PlanFragmentExecutor; -class TExportStatusResult; -class TExportTaskRequest; - -// used to report to master -struct ExportTaskResult { - // files exported to - std::vector files; -}; - -// used to report to master -struct ExportTaskCtx { - Status status; - ExportTaskResult result; -}; - -// we need to communicate with FE on the status of export tasks, so we need this class to manage. -class ExportTaskMgr { -public: - ExportTaskMgr(ExecEnv* exec_env); - - virtual ~ExportTaskMgr() = default; - - Status init(); - - Status start_task(const TExportTaskRequest& request); - - Status cancel_task(const TUniqueId& id); - - Status erase_task(const TUniqueId& id); - - Status finish_task(const TUniqueId& id, const Status& status, const ExportTaskResult& result); - - Status get_task_state(const TUniqueId& id, TExportStatusResult* status_result); - - void finalize_task(RuntimeState* state, Status* status); - -private: - void report_to_master(RuntimeState* state); - - ExecEnv* _exec_env; - - std::mutex _lock; - std::unordered_set _running_tasks; - LruCache _success_tasks; - LruCache _failed_tasks; -}; - -} // end namespace doris diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt index 9446fdf700..f0831f7317 100644 --- a/be/src/service/CMakeLists.txt +++ b/be/src/service/CMakeLists.txt @@ -21,14 +21,10 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/service") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/service") -add_library(Service - backend_options.cpp - backend_service.cpp - brpc_service.cpp - http_service.cpp - point_query_executor.cpp - internal_service.cpp -) +file(GLOB_RECURSE SRC_FILES CONFIGURE_DEPENDS *.cpp) +list(REMOVE_ITEM SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/doris_main.cpp) + +add_library(Service STATIC ${SRC_FILES}) pch_reuse(Service) diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index f2804018be..b59c420baf 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -21,95 +21,16 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/util") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/util") -add_library(cityhash cityhash102/city.cc) - -set(UTIL_FILES - arrow/row_batch.cpp - arrow/utils.cpp - arrow/block_convertor.cpp - bfd_parser.cpp - bitmap.cpp - block_compression.cpp - coding.cpp - cpu_info.cpp - crc32c.cpp - date_func.cpp - dynamic_util.cpp - debug_util.cpp - disk_info.cpp - errno.cpp - hash_util.hpp - histogram.cpp - doris_metrics.cpp - mem_info.cpp - metrics.cpp - murmur_hash3.cpp - network_util.cpp - parse_util.cpp - path_builder.cpp -# TODO: not supported on RHEL 5 - perf_counters.cpp - runtime_profile.cpp - static_asserts.cpp - thrift_util.cpp - thrift_client.cpp - thrift_server.cpp - stack_util.cpp - system_metrics.cpp - url_parser.cpp - url_coding.cpp - mysql_row_buffer.cpp - error_util.cc - time.cpp - os_info.cpp - os_util.cpp - # coding_util.cpp - cidr.cpp - core_local.cpp - uid_util.cpp - encryption_util.cpp - string_util.cpp - md5.cpp - sm3.cpp - thrift_rpc_helper.cpp - faststring.cc - slice.cpp - frame_of_reference_coding.cpp - utf8_check.cpp - cgroup_util.cpp - path_util.cpp - thread.cpp - threadpool.cpp - trace.cpp - trace_metrics.cpp - timezone_utils.cpp - easy_json.cc - mustache/mustache.cc - brpc_client_cache.cpp - zlib.cpp - pprof_utils.cpp - s3_uri.cpp - s3_util.cpp - hdfs_util.cpp - time_lut.cpp - telemetry/telemetry.cpp - telemetry/brpc_carrier.cpp - telemetry/open_telemetry_scop_wrapper.hpp - quantile_state.cpp - jni-util.cpp - libjvm_loader.cpp - jni_native_method.cpp -) - +file(GLOB_RECURSE UTIL_FILES CONFIGURE_DEPENDS *.cpp *.cc) if (OS_MACOSX) - list(REMOVE_ITEM UTIL_FILES perf_counters.cpp disk_info.cpp) - list(APPEND UTIL_FILES perf_counters_mac.cpp disk_info_mac.cpp) + list(REMOVE_ITEM UTIL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/perf_counters.cpp ${CMAKE_CURRENT_SOURCE_DIR}/disk_info.cpp) +else () + list(REMOVE_ITEM UTIL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/perf_counters_mac.cpp ${CMAKE_CURRENT_SOURCE_DIR}/disk_info_mac.cpp) endif() add_library(Util STATIC ${UTIL_FILES} ) -target_link_libraries(Util cityhash) pch_reuse(Util) \ No newline at end of file diff --git a/be/src/util/cityhash102/city.cc b/be/src/util/cityhash102/city.cc deleted file mode 100644 index 8c3d449d14..0000000000 --- a/be/src/util/cityhash102/city.cc +++ /dev/null @@ -1,468 +0,0 @@ -// Copyright (c) 2011 Google, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -// -// CityHash, by Geoff Pike and Jyrki Alakuijala -// -// This file provides CityHash64() and related functions. -// -// It's probably possible to create even faster hash functions by -// writing a program that systematically explores some of the space of -// possible hash functions, by using SIMD instructions, or by -// compromising on hash quality. - -#include "city.h" - -// IWYU pragma: no_include -#include // for memcpy and memset -#include - -#include "config.h" - -#if !defined(WORDS_BIGENDIAN) - -#define uint32_in_expected_order(x) (x) -#define uint64_in_expected_order(x) (x) - -#else - -#ifdef _MSC_VER -#include - -#define bswap_32(x) _byteswap_ulong(x) -#define bswap_64(x) _byteswap_uint64(x) - -#elif defined(__APPLE__) -// Mac OS X / Darwin features -#include - -#define bswap_32(x) OSSwapInt32(x) -#define bswap_64(x) OSSwapInt64(x) - -#else -#include -#endif - -#define uint32_in_expected_order(x) (bswap_32(x)) -#define uint64_in_expected_order(x) (bswap_64(x)) - -#endif // WORDS_BIGENDIAN - -#if !defined(LIKELY) -#if HAVE_BUILTIN_EXPECT -#define LIKELY(x) (__builtin_expect(!!(x), 1)) -#else -#define LIKELY(x) (x) -#endif -#endif - -namespace CityHash_v1_0_2 { - -static uint64 UNALIGNED_LOAD64(const char* p) { - uint64 result; - memcpy(&result, p, sizeof(result)); - return result; -} - -static uint32 UNALIGNED_LOAD32(const char* p) { - uint32 result; - memcpy(&result, p, sizeof(result)); - return result; -} - -static uint64 Fetch64(const char* p) { - return uint64_in_expected_order(UNALIGNED_LOAD64(p)); -} - -static uint32 Fetch32(const char* p) { - return uint32_in_expected_order(UNALIGNED_LOAD32(p)); -} - -// Some primes between 2^63 and 2^64 for various uses. -static const uint64 k0 = 0xc3a5c85c97cb3127ULL; -static const uint64 k1 = 0xb492b66fbe98f273ULL; -static const uint64 k2 = 0x9ae16a3b2f90404fULL; -static const uint64 k3 = 0xc949d7c7509e6557ULL; - -// Bitwise right rotate. Normally this will compile to a single -// instruction, especially if the shift is a manifest constant. -static uint64 Rotate(uint64 val, int shift) { - // Avoid shifting by 64: doing so yields an undefined result. - return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); -} - -// Equivalent to Rotate(), but requires the second arg to be non-zero. -// On x86-64, and probably others, it's possible for this to compile -// to a single instruction if both args are already in registers. -static uint64 RotateByAtLeast1(uint64 val, int shift) { - return (val >> shift) | (val << (64 - shift)); -} - -static uint64 ShiftMix(uint64 val) { - return val ^ (val >> 47); -} - -static uint64 HashLen16(uint64 u, uint64 v) { - return Hash128to64(uint128(u, v)); -} - -static uint64 HashLen0to16(const char* s, size_t len) { - if (len > 8) { - uint64 a = Fetch64(s); - uint64 b = Fetch64(s + len - 8); - return HashLen16(a, RotateByAtLeast1(b + len, len)) ^ b; - } - if (len >= 4) { - uint64 a = Fetch32(s); - return HashLen16(len + (a << 3), Fetch32(s + len - 4)); - } - if (len > 0) { - uint8 a = s[0]; - uint8 b = s[len >> 1]; - uint8 c = s[len - 1]; - uint32 y = static_cast(a) + (static_cast(b) << 8); - uint32 z = len + (static_cast(c) << 2); - return ShiftMix(y * k2 ^ z * k3) * k2; - } - return k2; -} - -// This probably works well for 16-byte strings as well, but it may be overkill -// in that case. -static uint64 HashLen17to32(const char* s, size_t len) { - uint64 a = Fetch64(s) * k1; - uint64 b = Fetch64(s + 8); - uint64 c = Fetch64(s + len - 8) * k2; - uint64 d = Fetch64(s + len - 16) * k0; - return HashLen16(Rotate(a - b, 43) + Rotate(c, 30) + d, a + Rotate(b ^ k3, 20) - c + len); -} - -// Return a 16-byte hash for 48 bytes. Quick and dirty. -// Callers do best to use "random-looking" values for a and b. -static std::pair WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, - uint64 a, uint64 b) { - a += w; - b = Rotate(b + a + z, 21); - uint64 c = a; - a += x; - a += y; - b += Rotate(a, 44); - return std::make_pair(a + z, b + c); -} - -// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. -static std::pair WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) { - return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a, - b); -} - -// Return an 8-byte hash for 33 to 64 bytes. -static uint64 HashLen33to64(const char* s, size_t len) { - uint64 z = Fetch64(s + 24); - uint64 a = Fetch64(s) + (len + Fetch64(s + len - 16)) * k0; - uint64 b = Rotate(a + z, 52); - uint64 c = Rotate(a, 37); - a += Fetch64(s + 8); - c += Rotate(a, 7); - a += Fetch64(s + 16); - uint64 vf = a + z; - uint64 vs = b + Rotate(a, 31) + c; - a = Fetch64(s + 16) + Fetch64(s + len - 32); - z = Fetch64(s + len - 8); - b = Rotate(a + z, 52); - c = Rotate(a, 37); - a += Fetch64(s + len - 24); - c += Rotate(a, 7); - a += Fetch64(s + len - 16); - uint64 wf = a + z; - uint64 ws = b + Rotate(a, 31) + c; - uint64 r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0); - return ShiftMix(r * k0 + vs) * k2; -} - -uint64 CityHash64(const char* s, size_t len) { - if (len <= 32) { - if (len <= 16) { - return HashLen0to16(s, len); - } else { - return HashLen17to32(s, len); - } - } else if (len <= 64) { - return HashLen33to64(s, len); - } - - // For strings over 64 bytes we hash the end first, and then as we - // loop we keep 56 bytes of state: v, w, x, y, and z. - uint64 x = Fetch64(s); - uint64 y = Fetch64(s + len - 16) ^ k1; - uint64 z = Fetch64(s + len - 56) ^ k0; - std::pair v = WeakHashLen32WithSeeds(s + len - 64, len, y); - std::pair w = WeakHashLen32WithSeeds(s + len - 32, len * k1, k0); - z += ShiftMix(v.second) * k1; - x = Rotate(z + x, 39) * k1; - y = Rotate(y, 33) * k1; - - // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. - len = (len - 1) & ~static_cast(63); - do { - x = Rotate(x + y + v.first + Fetch64(s + 16), 37) * k1; - y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; - x ^= w.second; - y ^= v.first; - z = Rotate(z ^ w.first, 33); - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y); - std::swap(z, x); - s += 64; - len -= 64; - } while (len != 0); - return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, - HashLen16(v.second, w.second) + x); -} - -uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) { - return CityHash64WithSeeds(s, len, k2, seed); -} - -uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) { - return HashLen16(CityHash64(s, len) - seed0, seed1); -} - -// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings -// of any length representable in ssize_t. Based on City and Murmur. -static uint128 CityMurmur(const char* s, size_t len, uint128 seed) { - uint64 a = Uint128Low64(seed); - uint64 b = Uint128High64(seed); - uint64 c = 0; - uint64 d = 0; - ssize_t l = len - 16; - if (l <= 0) { // len <= 16 - a = ShiftMix(a * k1) * k1; - c = b * k1 + HashLen0to16(s, len); - d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); - } else { // len > 16 - c = HashLen16(Fetch64(s + len - 8) + k1, a); - d = HashLen16(b + len, c + Fetch64(s + len - 16)); - a += d; - do { - a ^= ShiftMix(Fetch64(s) * k1) * k1; - a *= k1; - b ^= a; - c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; - c *= k1; - d ^= c; - s += 16; - l -= 16; - } while (l > 0); - } - a = HashLen16(a, c); - b = HashLen16(d, b); - return uint128(a ^ b, HashLen16(b, a)); -} - -uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) { - if (len < 128) { - return CityMurmur(s, len, seed); - } - - // We expect len >= 128 to be the common case. Keep 56 bytes of state: - // v, w, x, y, and z. - std::pair v, w; - uint64 x = Uint128Low64(seed); - uint64 y = Uint128High64(seed); - uint64 z = len * k1; - v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); - v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); - w.first = Rotate(y + z, 35) * k1 + x; - w.second = Rotate(x + Fetch64(s + 88), 53) * k1; - - // This is the same inner loop as CityHash64(), manually unrolled. - do { - x = Rotate(x + y + v.first + Fetch64(s + 16), 37) * k1; - y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; - x ^= w.second; - y ^= v.first; - z = Rotate(z ^ w.first, 33); - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y); - std::swap(z, x); - s += 64; - x = Rotate(x + y + v.first + Fetch64(s + 16), 37) * k1; - y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; - x ^= w.second; - y ^= v.first; - z = Rotate(z ^ w.first, 33); - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y); - std::swap(z, x); - s += 64; - len -= 128; - } while (LIKELY(len >= 128)); - y += Rotate(w.first, 37) * k0 + z; - x += Rotate(v.first + z, 49) * k0; - // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. - for (size_t tail_done = 0; tail_done < len;) { - tail_done += 32; - y = Rotate(y - x, 42) * k0 + v.second; - w.first += Fetch64(s + len - tail_done + 16); - x = Rotate(x, 49) * k0 + w.first; - w.first += v.first; - v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second); - } - // At this point our 48 bytes of state should contain more than - // enough information for a strong 128-bit hash. We use two - // different 48-byte-to-8-byte hashes to get a 16-byte final result. - x = HashLen16(x, v.first); - y = HashLen16(y, w.first); - return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second)); -} - -uint128 CityHash128(const char* s, size_t len) { - if (len >= 16) { - return CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s) ^ k3, Fetch64(s + 8))); - } else if (len >= 8) { - return CityHash128WithSeed(nullptr, 0, - uint128(Fetch64(s) ^ (len * k0), Fetch64(s + len - 8) ^ k1)); - } else { - return CityHash128WithSeed(s, len, uint128(k0, k1)); - } -} - -} // namespace CityHash_v1_0_2 - -#if defined(__SSE4_2__) || defined(__aarch64__) -#include "citycrc.h" -#include "util/sse_util.hpp" // IWYU pragma: keep - -namespace CityHash_v1_0_2 { - -// Requires len >= 240. -static void CityHashCrc256Long(const char* s, size_t len, uint32 seed, uint64* result) { - uint64 a = Fetch64(s + 56) + k0; - uint64 b = Fetch64(s + 96) + k0; - uint64 c = result[1] = HashLen16(b, len); - uint64 d = result[2] = Fetch64(s + 120) * k0 + len; - uint64 e = Fetch64(s + 184) + seed; - uint64 f = seed; - uint64 g = 0; - uint64 h = 0; - uint64 i = 0; - uint64 j = 0; - uint64 t = c + d; - - // 240 bytes of input per iter. - size_t iters = len / 240; - len -= iters * 240; - do { -#define CHUNK(multiplier, z) \ - { \ - uint64 old_a = a; \ - a = Rotate(b, 41 ^ z) * multiplier + Fetch64(s); \ - b = Rotate(c, 27 ^ z) * multiplier + Fetch64(s + 8); \ - c = Rotate(d, 41 ^ z) * multiplier + Fetch64(s + 16); \ - d = Rotate(e, 33 ^ z) * multiplier + Fetch64(s + 24); \ - e = Rotate(t, 25 ^ z) * multiplier + Fetch64(s + 32); \ - t = old_a; \ - } \ - f = _mm_crc32_u64(f, a); \ - g = _mm_crc32_u64(g, b); \ - h = _mm_crc32_u64(h, c); \ - i = _mm_crc32_u64(i, d); \ - j = _mm_crc32_u64(j, e); \ - s += 40 - - CHUNK(1, 1); - CHUNK(k0, 0); - CHUNK(1, 1); - CHUNK(k0, 0); - CHUNK(1, 1); - CHUNK(k0, 0); - } while (--iters > 0); - j += i << 32; - a = HashLen16(a, j); - h += g << 32; - b = b * k0 + h; - c = HashLen16(c, f) + i; - d = HashLen16(d, e); - std::pair v(j + e, HashLen16(h, t)); - h = v.second + f; - // If 0 < len < 240, hash chunks of 32 bytes each from the end of s. - for (size_t tail_done = 0; tail_done < len;) { - tail_done += 32; - c = Rotate(c - a, 42) * k0 + v.second; - d += Fetch64(s + len - tail_done + 16); - a = Rotate(a, 49) * k0 + d; - d += v.first; - v = WeakHashLen32WithSeeds(s + len - tail_done, v.first, v.second); - } - - // Final mix. - e = HashLen16(a, d) + v.first; - f = HashLen16(b, c) + a; - g = HashLen16(v.first, v.second) + c; - result[0] = e + f + g + h; - a = ShiftMix((a + g) * k0) * k0 + b; - result[1] += a + result[0]; - a = ShiftMix(a * k0) * k0 + c; - result[2] += a + result[1]; - a = ShiftMix((a + e) * k0) * k0; - result[3] = a + result[2]; -} - -// Requires len < 240. -static void CityHashCrc256Short(const char* s, size_t len, uint64* result) { - char buf[240]; - memcpy(buf, s, len); - memset(buf + len, 0, 240 - len); - CityHashCrc256Long(buf, 240, ~static_cast(len), result); -} - -void CityHashCrc256(const char* s, size_t len, uint64* result) { - if (LIKELY(len >= 240)) { - CityHashCrc256Long(s, len, 0, result); - } else { - CityHashCrc256Short(s, len, result); - } -} - -uint128 CityHashCrc128WithSeed(const char* s, size_t len, uint128 seed) { - if (len <= 900) { - return CityHash128WithSeed(s, len, seed); - } else { - uint64 result[4]; - CityHashCrc256(s, len, result); - uint64 u = Uint128High64(seed) + result[0]; - uint64 v = Uint128Low64(seed) + result[1]; - return uint128(HashLen16(u, v + result[2]), HashLen16(Rotate(v, 32), u * k0 + result[3])); - } -} - -uint128 CityHashCrc128(const char* s, size_t len) { - if (len <= 900) { - return CityHash128(s, len); - } else { - uint64 result[4]; - CityHashCrc256(s, len, result); - return uint128(result[2], result[3]); - } -} - -} // namespace CityHash_v1_0_2 - -#endif diff --git a/be/src/util/cityhash102/city.h b/be/src/util/cityhash102/city.h deleted file mode 100644 index 77d4c988cd..0000000000 --- a/be/src/util/cityhash102/city.h +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright (c) 2011 Google, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -// -// CityHash, by Geoff Pike and Jyrki Alakuijala -// -// This file provides a few functions for hashing strings. On x86-64 -// hardware in 2011, CityHash64() is faster than other high-quality -// hash functions, such as Murmur. This is largely due to higher -// instruction-level parallelism. CityHash64() and CityHash128() also perform -// well on hash-quality tests. -// -// CityHash128() is optimized for relatively long strings and returns -// a 128-bit hash. For strings more than about 2000 bytes it can be -// faster than CityHash64(). -// -// Functions in the CityHash family are not suitable for cryptography. -// -// WARNING: This code has not been tested on big-endian platforms! -// It is known to work well on little-endian platforms that have a small penalty -// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. -// -// By the way, for some hash functions, given strings a and b, the hash -// of a+b is easily derived from the hashes of a and b. This property -// doesn't hold for any hash functions in this file. - -#ifndef CITY_HASH_H_ -#define CITY_HASH_H_ - -#include // for size_t. -#include -#include - -/** This is a version of CityHash that predates v1.0.3 algorithm change. - * Why we need exactly this version? - * Although hash values of CityHash are not recommended for storing persistently anywhere, - * it has already been used this way in ClickHouse: - * - for calculation of checksums of compressed chunks and for data parts; - * - this version of CityHash is exposed in cityHash64 function in ClickHouse SQL language; - * - and already used by many users for data ordering, sampling and sharding. - */ -namespace CityHash_v1_0_2 -{ - -typedef uint8_t uint8; -typedef uint32_t uint32; -typedef uint64_t uint64; -typedef std::pair uint128; - - -inline uint64 Uint128Low64(const uint128& x) { return x.first; } -inline uint64 Uint128High64(const uint128& x) { return x.second; } - -// Hash function for a byte array. -uint64 CityHash64(const char *buf, size_t len); - -// Hash function for a byte array. For convenience, a 64-bit seed is also -// hashed into the result. -uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed); - -// Hash function for a byte array. For convenience, two seeds are also -// hashed into the result. -uint64 CityHash64WithSeeds(const char *buf, size_t len, - uint64 seed0, uint64 seed1); - -// Hash function for a byte array. -uint128 CityHash128(const char *s, size_t len); - -// Hash function for a byte array. For convenience, a 128-bit seed is also -// hashed into the result. -uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed); - -// Hash 128 input bits down to 64 bits of output. -// This is intended to be a reasonably good hash function. -inline uint64 Hash128to64(const uint128& x) { - // Murmur-inspired hashing. - const uint64 kMul = 0x9ddfea08eb382d69ULL; - uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; - a ^= (a >> 47); - uint64 b = (Uint128High64(x) ^ a) * kMul; - b ^= (b >> 47); - b *= kMul; - return b; -} - -} - -#endif // CITY_HASH_H_ diff --git a/be/src/util/cityhash102/citycrc.h b/be/src/util/cityhash102/citycrc.h deleted file mode 100644 index 3ec72cc887..0000000000 --- a/be/src/util/cityhash102/citycrc.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2011 Google, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -// -// CityHash, by Geoff Pike and Jyrki Alakuijala -// -// This file declares the subset of the CityHash functions that require -// _mm_crc32_u64(). See the CityHash README for details. -// -// Functions in the CityHash family are not suitable for cryptography. - -#ifndef CITY_HASH_CRC_H_ -#define CITY_HASH_CRC_H_ - -#include "city.h" - -namespace CityHash_v1_0_2 -{ - -// Hash function for a byte array. -uint128 CityHashCrc128(const char *s, size_t len); - -// Hash function for a byte array. For convenience, a 128-bit seed is also -// hashed into the result. -uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed); - -// Hash function for a byte array. Sets result[0] ... result[3]. -void CityHashCrc256(const char *s, size_t len, uint64 *result); - -} - -#endif // CITY_HASH_CRC_H_ diff --git a/be/src/util/cityhash102/config.h b/be/src/util/cityhash102/config.h deleted file mode 100644 index cca744a35c..0000000000 --- a/be/src/util/cityhash102/config.h +++ /dev/null @@ -1,125 +0,0 @@ -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.ac by autoheader. */ - -/* Define if building universal (internal helper macro) */ -/* #undef AC_APPLE_UNIVERSAL_BUILD */ - -/* Define to 1 if the compiler supports __builtin_expect. */ -#if _MSC_VER -#define HAVE_BUILTIN_EXPECT 0 -#else -#define HAVE_BUILTIN_EXPECT 1 -#endif - -/* Define to 1 if you have the header file. */ -#define HAVE_DLFCN_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "cityhash-discuss@googlegroups.com" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "CityHash" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "CityHash 1.0.2" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "cityhash" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "1.0.2" - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most - significant byte first (like Motorola and SPARC, unlike Intel). */ -#if defined AC_APPLE_UNIVERSAL_BUILD -# if defined __BIG_ENDIAN__ -# define WORDS_BIGENDIAN 1 -# endif -#else -# ifndef WORDS_BIGENDIAN -/* # undef WORDS_BIGENDIAN */ -# endif -#endif - -/* Define for Solaris 2.5.1 so the uint32_t typedef from , - , or is not used. If the typedef were allowed, the - #define below would cause a syntax error. */ -/* #undef _UINT32_T */ - -/* Define for Solaris 2.5.1 so the uint64_t typedef from , - , or is not used. If the typedef were allowed, the - #define below would cause a syntax error. */ -/* #undef _UINT64_T */ - -/* Define for Solaris 2.5.1 so the uint8_t typedef from , - , or is not used. If the typedef were allowed, the - #define below would cause a syntax error. */ -/* #undef _UINT8_T */ - -/* Define to `__inline__' or `__inline' if that's what the C compiler - calls it, or to nothing if 'inline' is not supported under any name. */ -#ifndef __cplusplus -/* #undef inline */ -#endif - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ - -/* Define to `int' if does not define. */ -/* #undef ssize_t */ - -/* Define to the type of an unsigned integer type of width exactly 32 bits if - such a type exists and the standard includes do not define it. */ -/* #undef uint32_t */ - -/* Define to the type of an unsigned integer type of width exactly 64 bits if - such a type exists and the standard includes do not define it. */ -/* #undef uint64_t */ - -/* Define to the type of an unsigned integer type of width exactly 8 bits if - such a type exists and the standard includes do not define it. */ -/* #undef uint8_t */ - -#ifdef _MSC_VER - #include - typedef SSIZE_T ssize_t; -#else - #include -#endif diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 763d6a0ed7..b40e24f7ea 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -19,349 +19,10 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/vec") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/vec") -set(VEC_FILES - aggregate_functions/aggregate_function_sequence_match.cpp - aggregate_functions/aggregate_function_retention.cpp - aggregate_functions/aggregate_function_window_funnel.cpp - aggregate_functions/aggregate_function_avg.cpp - aggregate_functions/aggregate_function_collect.cpp - aggregate_functions/aggregate_function_count.cpp - aggregate_functions/aggregate_function_distinct.cpp - aggregate_functions/aggregate_function_sum.cpp - aggregate_functions/aggregate_function_sort.cpp - aggregate_functions/aggregate_function_min_max.cpp - aggregate_functions/aggregate_function_min_by.cpp - aggregate_functions/aggregate_function_max_by.cpp - aggregate_functions/aggregate_function_uniq.cpp - aggregate_functions/aggregate_function_hll_union_agg.cpp - aggregate_functions/aggregate_function_bit.cpp - aggregate_functions/aggregate_function_bitmap.cpp - aggregate_functions/aggregate_function_reader.cpp - aggregate_functions/aggregate_function_window.cpp - aggregate_functions/aggregate_function_stddev.cpp - aggregate_functions/aggregate_function_topn.cpp - aggregate_functions/aggregate_function_approx_count_distinct.cpp - aggregate_functions/aggregate_function_group_concat.cpp - aggregate_functions/aggregate_function_percentile_approx.cpp - aggregate_functions/aggregate_function_simple_factory.cpp - aggregate_functions/aggregate_function_orthogonal_bitmap.cpp - aggregate_functions/aggregate_function_avg_weighted.cpp - aggregate_functions/aggregate_function_histogram.cpp - aggregate_functions/aggregate_function_quantile_state.cpp - columns/column.cpp - columns/column_array.cpp - columns/column_struct.cpp - columns/column_const.cpp - columns/column_decimal.cpp - columns/column_nullable.cpp - columns/column_string.cpp - columns/column_vector.cpp - columns/column_map.cpp - columns/columns_common.cpp - columns/column_object.cpp - json/json_parser.cpp - json/parse2column.cpp - json/path_in_data.cpp - common/schema_util.cpp - common/demangle.cpp - common/mremap.cpp - common/pod_array.cpp - common/string_ref.cpp - common/sort/heap_sorter.cpp - common/sort/sorter.cpp - common/sort/topn_sorter.cpp - common/sort/vsort_exec_exprs.cpp - common/sort/partition_sorter.cpp - common/string_utils/string_utils.cpp - common/hex.cpp - common/allocator.cpp - core/block.cpp - core/block_spill_reader.cpp - core/block_spill_writer.cpp - core/column_with_type_and_name.cpp - core/field.cpp - core/field.cpp - core/sort_block.cpp - core/materialize_block.cpp - data_types/serde/data_type_serde.cpp - data_types/serde/data_type_map_serde.cpp - data_types/serde/data_type_array_serde.cpp - data_types/serde/data_type_struct_serde.cpp - data_types/serde/data_type_number_serde.cpp - data_types/serde/data_type_datev2_serde.cpp - data_types/serde/data_type_datetimev2_serde.cpp - data_types/serde/data_type_date64_serde.cpp - data_types/serde/data_type_string_serde.cpp - data_types/serde/data_type_decimal_serde.cpp - data_types/serde/data_type_object_serde.cpp - data_types/serde/data_type_fixedlengthobject_serde.cpp - data_types/serde/data_type_hll_serde.cpp - data_types/serde/data_type_bitmap_serde.cpp - data_types/serde/data_type_quantilestate_serde.cpp - data_types/serde/data_type_nullable_serde.cpp - data_types/serde/data_type_time_serde.cpp - data_types/serde/data_type_jsonb_serde.cpp - data_types/data_type.cpp - data_types/data_type_array.cpp - data_types/data_type_struct.cpp - data_types/data_type_bitmap.cpp - data_types/data_type_factory.cpp - data_types/data_type_fixed_length_object.cpp - data_types/data_type_hll.cpp - data_types/data_type_nothing.cpp - data_types/data_type_nothing.cpp - data_types/data_type_nullable.cpp - data_types/data_type_nullable.cpp - data_types/data_type_number_base.cpp - data_types/data_type_string.cpp - data_types/data_type_decimal.cpp - data_types/data_type_map.cpp - data_types/data_type_quantilestate.cpp - data_types/get_least_supertype.cpp - data_types/convert_field_to_type.cpp - data_types/nested_utils.cpp - data_types/data_type_date.cpp - data_types/data_type_date_time.cpp - data_types/data_type_time_v2.cpp - data_types/data_type_jsonb.cpp - data_types/data_type_time.cpp - data_types/data_type_object.cpp - exec/vaggregation_node.cpp - exec/vsort_node.cpp - exec/vexchange_node.cpp - exec/vset_operation_node.cpp - exec/vunion_node.cpp - exec/vselect_node.cpp - exec/vschema_scan_node.cpp - exec/vempty_set_node.cpp - exec/vanalytic_eval_node.cpp - exec/vassert_num_rows_node.cpp - exec/vrepeat_node.cpp - exec/vtable_function_node.cpp - exec/vjdbc_connector.cpp - exec/vpartition_sort_node.cpp - exec/join/vhash_join_node.cpp - exec/join/vjoin_node_base.cpp - exec/join/vnested_loop_join_node.cpp - exec/join/inner_join_impl.cpp - exec/join/left_semi_join_impl.cpp - exec/join/left_anti_join_impl.cpp - exec/join/left_outer_join_impl.cpp - exec/join/full_outer_join_impl.cpp - exec/join/right_outer_join_impl.cpp - exec/join/cross_join_impl.cpp - exec/join/right_semi_join_impl.cpp - exec/join/right_anti_join_impl.cpp - exec/join/null_aware_left_anti_join_impl.cpp - exec/data_gen_functions/vnumbers_tvf.cpp - exec/vdata_gen_scan_node.cpp - exprs/vectorized_agg_fn.cpp - exprs/vectorized_fn_call.cpp - exprs/vexpr.cpp - exprs/vexpr_context.cpp - exprs/vliteral.cpp - exprs/varray_literal.cpp - exprs/vmap_literal.cpp - exprs/vmatch_predicate.cpp - exprs/vstruct_literal.cpp - exprs/vin_predicate.cpp - exprs/vbloom_predicate.cpp - exprs/vbitmap_predicate.cpp - exprs/vruntimefilter_wrapper.cpp - exprs/vtuple_is_null_predicate.cpp - exprs/vslot_ref.cpp - exprs/vcast_expr.cpp - exprs/vcase_expr.cpp - exprs/vinfo_func.cpp - exprs/vschema_change_expr.cpp - exprs/table_function/table_function_factory.cpp - exprs/table_function/vexplode.cpp - exprs/table_function/vexplode_split.cpp - exprs/table_function/vexplode_numbers.cpp - exprs/table_function/vexplode_bitmap.cpp - exprs/lambda_function/varray_map_function.cpp - exprs/lambda_function/varray_filter_function.cpp - functions/array/function_array_index.cpp - functions/array/function_array_element.cpp - functions/array/function_array_register.cpp - functions/array/function_array_size.cpp - functions/array/function_array_aggregation.cpp - functions/array/function_array_sort.cpp - functions/array/function_array_sortby.cpp - functions/array/function_array_utils.cpp - functions/array/function_array_shuffle.cpp - functions/array/function_arrays_overlap.cpp - functions/array/function_array_distinct.cpp - functions/array/function_array_remove.cpp - functions/array/function_array_union.cpp - functions/array/function_array_except.cpp - functions/array/function_array_exists.cpp - functions/array/function_array_intersect.cpp - functions/array/function_array_slice.cpp - functions/array/function_array_difference.cpp - functions/array/function_array_enumerate.cpp - functions/array/function_array_enumerate_uniq.cpp - functions/array/function_array_range.cpp - functions/array/function_array_compact.cpp - functions/array/function_array_popback.cpp - functions/array/function_array_popfront.cpp - functions/array/function_array_constructor.cpp - functions/array/function_array_with_constant.cpp - functions/array/function_array_apply.cpp - functions/array/function_array_concat.cpp - functions/array/function_array_zip.cpp - functions/array/function_array_pushfront.cpp - functions/array/function_array_first_or_last_index.cpp - functions/array/function_array_cum_sum.cpp - functions/array/function_array_count.cpp - functions/function_map.cpp - functions/function_struct.cpp - functions/function_struct_element.cpp - exprs/table_function/vexplode_json_array.cpp - functions/math.cpp - functions/function_bitmap.cpp - functions/function_bitmap_variadic.cpp - functions/comparison.cpp - functions/comparison_less.cpp - functions/comparison_equals.cpp - functions/comparison_greater.cpp - functions/function.cpp - functions/function_helpers.cpp - functions/function_hash.cpp - functions/functions_logical.cpp - functions/function_case.cpp - functions/function_cast.cpp - functions/function_conv.cpp - functions/function_encryption.cpp - functions/function_regexp.cpp - functions/function_hex.cpp - functions/function_string.cpp - functions/function_timestamp.cpp - functions/function_utility.cpp - functions/comparison_equal_for_null.cpp - functions/function_json.cpp - functions/function_jsonb.cpp - functions/function_datetime_floor_ceil.cpp - functions/functions_geo.cpp - functions/hll_cardinality.cpp - functions/hll_empty.cpp - functions/hll_hash.cpp - functions/plus.cpp - functions/modulo.cpp - functions/multiply.cpp - functions/minus.cpp - functions/int_div.cpp - functions/divide.cpp - functions/function_bit.cpp - functions/is_null.cpp - functions/is_not_null.cpp - functions/in.cpp - functions/like.cpp - functions/to_time_function.cpp - functions/time_of_function.cpp - functions/if.cpp - functions/function_ifnull.cpp - functions/nullif.cpp - functions/random.cpp - functions/uuid.cpp - functions/function_coalesce.cpp - functions/function_date_or_datetime_computation.cpp - functions/function_date_or_datetime_computation_v2.cpp - functions/function_date_or_datetime_to_string.cpp - functions/function_datetime_string_to_string.cpp - functions/function_grouping.cpp - functions/function_java_udf.cpp - functions/function_rpc.cpp - functions/function_convert_tz.cpp - functions/function_nonnullable.cpp - functions/least_greast.cpp - functions/function_fake.cpp - functions/url/function_url.cpp - functions/functions_multi_string_position.cpp - functions/functions_multi_string_search.cpp - functions/function_running_difference.cpp - functions/function_width_bucket.cpp - functions/match.cpp - functions/function_quantile_state.cpp - - jsonb/serialize.cpp - olap/vgeneric_iterators.cpp - olap/vcollect_iterator.cpp - olap/block_reader.cpp - olap/olap_data_convertor.cpp - olap/vertical_merge_iterator.cpp - olap/vertical_block_reader.cpp - sink/vmysql_result_writer.cpp - sink/vresult_sink.cpp - sink/vdata_stream_sender.cpp - sink/vtablet_sink.cpp - sink/vmemory_scratch_sink.cpp - sink/vmysql_table_writer.cpp - sink/vmysql_table_sink.cpp - sink/vodbc_table_sink.cpp - sink/vresult_file_sink.cpp - sink/vjdbc_table_sink.cpp - sink/vtable_sink.cpp - runtime/vdatetime_value.cpp - runtime/vdata_stream_recvr.cpp - runtime/vdata_stream_mgr.cpp - runtime/vfile_result_writer.cpp - runtime/vparquet_writer.cpp - runtime/vorc_writer.cpp - runtime/vsorted_run_merger.cpp - runtime/shared_hash_table_controller.cpp - utils/arrow_column_to_doris_column.cpp - exec/format/parquet/vparquet_column_chunk_reader.cpp - exec/format/parquet/vparquet_group_reader.cpp - exec/format/parquet/vparquet_page_index.cpp - exec/format/parquet/vparquet_reader.cpp - exec/format/parquet/vparquet_file_metadata.cpp - exec/format/parquet/vparquet_page_reader.cpp - exec/format/parquet/schema_desc.cpp - exec/format/parquet/vparquet_column_reader.cpp - exec/format/parquet/level_decoder.cpp - exec/format/parquet/decoder.cpp - exec/format/parquet/fix_length_plain_decoder.cpp - exec/format/parquet/byte_array_plain_decoder.cpp - exec/format/parquet/byte_array_dict_decoder.cpp - exec/format/parquet/bool_plain_decoder.cpp - exec/format/parquet/parquet_common.cpp - exec/scan/vscan_node.cpp - exec/scan/vscanner.cpp - exec/scan/scanner_context.cpp - exec/scan/scanner_scheduler.cpp - exec/scan/new_olap_scan_node.cpp - exec/scan/new_olap_scanner.cpp - exec/scan/new_file_scan_node.cpp - exec/scan/vfile_scanner.cpp - exec/scan/new_odbc_scanner.cpp - exec/scan/new_odbc_scan_node.cpp - exec/scan/new_jdbc_scanner.cpp - exec/scan/new_jdbc_scan_node.cpp - exec/scan/new_es_scanner.cpp - exec/scan/new_es_scan_node.cpp - exec/scan/vmeta_scan_node.cpp - exec/scan/vmeta_scanner.cpp - exec/format/csv/csv_reader.cpp - exec/format/orc/vorc_reader.cpp - exec/format/json/new_json_reader.cpp - exec/format/table/table_format_reader.cpp - exec/format/table/iceberg_reader.cpp - exec/format/file_reader/new_plain_text_line_reader.cpp - exec/format/file_reader/new_plain_binary_line_reader.cpp - exec/format/parquet/delta_bit_pack_decoder.cpp - exec/format/parquet/bool_rle_decoder.cpp - exec/jni_connector.cpp - exec/scan/jni_reader.cpp - exec/scan/paimon_reader.cpp - exec/scan/max_compute_jni_reader.cpp - ) +file(GLOB_RECURSE VEC_FILES CONFIGURE_DEPENDS *.cpp) -if (WITH_MYSQL) - set(VEC_FILES - ${VEC_FILES} - exec/vmysql_scan_node.cpp - exec/scan/mysql_scanner.cpp) +if (NOT WITH_MYSQL) + list(REMOVE_ITEM VEC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/exec/vmysql_scan_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/exec/scan/mysql_scanner.cpp) endif () add_library(Vec STATIC diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index ad048d4656..2834afac6a 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -21,274 +21,47 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/test") # where to put generated libraries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/test") -set(AGENT_TEST_FILES - agent/utils_test.cpp - # agent/agent_server_test.cpp - # agent/heartbeat_server_test.cpp -) -set(COMMON_TEST_FILES - common/resource_tls_test.cpp - common/status_test.cpp - common/config_test.cpp - common/exception_test.cpp -) +file(GLOB_RECURSE UT_FILES CONFIGURE_DEPENDS *.cpp) -set(EXEC_TEST_FILES - vec/exec/parquet/parquet_thrift_test.cpp - vec/exec/parquet/parquet_reader_test.cpp -) - -if(DEFINED DORIS_WITH_LZO) - set(EXEC_TEST_FILES ${EXEC_FILES} exec/plain_text_line_reader_lzop_test.cpp) +if(NOT DEFINED DORIS_WITH_LZO) + list(REMOVE_ITEM UT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/exec/plain_text_line_reader_lzop_test.cpp) endif() -set(EXPRS_TEST_FILES - # exprs/binary_predicate_test.cpp - # exprs/in_predicate_test.cpp - # exprs/expr-test.cpp - # exprs/hybrid_set_test.cpp - # exprs/in-predicate-test.cpp - exprs/json_function_test.cpp - exprs/bloom_filter_predicate_test.cpp -) -set(GEO_TEST_FILES - geo/wkt_parse_test.cpp - geo/geo_types_test.cpp -) -set(GUTIL_TEST_FILES - gutil/strings/numbers_test.cpp -) -set(HTTP_TEST_FILES - http/message_body_sink_test.cpp - http/http_utils_test.cpp - http/http_client_test.cpp - http/http_auth_test.cpp - # TODO this will overide HttpChannel and make other test failed - # http/metrics_action_test.cpp -) -set(IO_TEST_FILES - io/cache/remote_file_cache_test.cpp - io/cache/file_block_cache_test.cpp - io/fs/local_file_system_test.cpp - io/fs/remote_file_system_test.cpp - io/fs/buffered_reader_test.cpp - io/fs/multi_table_pipe_test.cpp -) -set(OLAP_TEST_FILES - olap/engine_storage_migration_task_test.cpp - olap/timestamped_version_tracker_test.cpp - olap/tablet_schema_helper.cpp - olap/delta_writer_test.cpp - olap/delete_handler_test.cpp - olap/lru_cache_test.cpp - olap/bloom_filter_test.cpp - olap/itoken_extractor_test.cpp - olap/file_header_test.cpp - #olap/file_utils_test.cpp - olap/cumulative_compaction_policy_test.cpp - #olap/row_cursor_test.cpp - olap/skiplist_test.cpp - olap/memtable_sort_test.cpp - olap/olap_meta_test.cpp - olap/decimal12_test.cpp - olap/storage_types_test.cpp - #olap/rowset/segment_v2/bitshuffle_page_test.cpp - #olap/rowset/segment_v2/plain_page_test.cpp - olap/rowset/segment_v2/bitmap_index_test.cpp - #olap/rowset/segment_v2/binary_plain_page_test.cpp - #olap/rowset/segment_v2/binary_prefix_page_test.cpp - #olap/rowset/segment_v2/column_reader_writer_test.cpp - olap/rowset/segment_v2/encoding_info_test.cpp - olap/rowset/segment_v2/ordinal_page_index_test.cpp - #olap/rowset/segment_v2/rle_page_test.cpp - #olap/rowset/segment_v2/binary_dict_page_test.cpp - olap/rowset/segment_v2/row_ranges_test.cpp - #olap/rowset/segment_v2/frame_of_reference_page_test.cpp - olap/rowset/segment_v2/block_bloom_filter_test.cpp - olap/rowset/segment_v2/bloom_filter_index_reader_writer_test.cpp - olap/rowset/segment_v2/zone_map_index_test.cpp - olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp - olap/tablet_meta_test.cpp - olap/tablet_meta_manager_test.cpp - olap/tablet_mgr_test.cpp - olap/tablet_test.cpp - olap/rowset/rowset_meta_manager_test.cpp - olap/rowset/rowset_meta_test.cpp - olap/rowset/beta_rowset_test.cpp - olap/rowset/unique_rowset_id_generator_test.cpp - olap/rowset/rowset_tree_test.cpp - olap/txn_manager_test.cpp - olap/key_coder_test.cpp - olap/short_key_index_test.cpp - olap/primary_key_index_test.cpp - olap/page_cache_test.cpp - olap/hll_test.cpp - olap/selection_vector_test.cpp - olap/block_column_predicate_test.cpp - olap/options_test.cpp - olap/common_test.cpp - olap/tablet_cooldown_test.cpp - olap/rowid_conversion_test.cpp - olap/remote_rowset_gc_test.cpp - #olap/segcompaction_test.cpp - olap/ordered_data_compaction_test.cpp - olap/delete_bitmap_calculator_test.cpp -) -set(RUNTIME_TEST_FILES - # runtime/result_buffer_mgr_test.cpp - # runtime/parallel_executor_test.cpp - # runtime/dpp_sink_test.cpp - # runtime/export_task_mgr_test.cpp - runtime/arena_test.cpp - runtime/decimalv2_value_test.cpp - runtime/large_int_value_test.cpp - runtime/string_value_test.cpp - runtime/fragment_mgr_test.cpp - runtime/mem_limit_test.cpp - runtime/snapshot_loader_test.cpp - runtime/user_function_cache_test.cpp - runtime/routine_load_task_executor_test.cpp - runtime/small_file_mgr_test.cpp - runtime/heartbeat_flags_test.cpp - runtime/result_queue_mgr_test.cpp - runtime/test_env.cc - runtime/external_scan_context_mgr_test.cpp - runtime/memory/chunk_allocator_test.cpp - runtime/memory/system_allocator_test.cpp - runtime/cache/partition_cache_test.cpp - #runtime/array_test.cpp -) -set(TESTUTIL_TEST_FILES - testutil/test_util.cpp - testutil/desc_tbl_builder.cc - testutil/function_utils.cpp - testutil/run_all_tests.cpp -) - -set(UTIL_TEST_FILES - util/bit_util_test.cpp - util/brpc_client_cache_test.cpp - util/path_trie_test.cpp - util/coding_test.cpp - util/crc32c_test.cpp - util/lru_cache_util_test.cpp - util/cidr_test.cpp - util/metrics_test.cpp - util/doris_metrics_test.cpp - util/system_metrics_test.cpp - util/string_util_test.cpp - util/string_parser_test.cpp - util/core_local_test.cpp - util/byte_buffer2_test.cpp - util/uid_util_test.cpp - util/encryption_util_test.cpp - util/md5_test.cpp - util/sm3_test.cpp - util/bitmap_test.cpp - util/bitmap_value_test.cpp - util/faststring_test.cpp - util/rle_encoding_test.cpp - util/tdigest_test.cpp - util/block_compression_test.cpp - util/frame_of_reference_coding_test.cpp - util/bit_stream_utils_test.cpp - util/radix_sort_test.cpp - util/utf8_check_test.cpp - util/cgroup_util_test.cpp - util/path_util_test.cpp - util/parse_util_test.cpp - util/countdown_latch_test.cpp - util/scoped_cleanup_test.cpp - util/thread_test.cpp - util/threadpool_test.cpp - util/mysql_row_buffer_test.cpp - util/easy_json-test.cpp - util/http_channel_test.cpp - util/histogram_test.cpp - util/s3_uri_test.cpp - util/sort_heap_test.cpp - util/counts_test.cpp - util/date_func_test.cpp - util/quantile_state_test.cpp - util/interval_tree_test.cpp - util/key_util_test.cpp -) if (OS_MACOSX) - list(REMOVE_ITEM UTIL_TEST_FILES util/system_metrics_test.cpp) + list(REMOVE_ITEM UT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/util/system_metrics_test.cpp) endif() -set(VEC_TEST_FILES - vec/aggregate_functions/agg_collect_test.cpp - vec/aggregate_functions/agg_histogram_test.cpp - vec/aggregate_functions/agg_test.cpp - vec/aggregate_functions/agg_min_max_test.cpp - vec/aggregate_functions/agg_replace_test.cpp - vec/aggregate_functions/vec_window_funnel_test.cpp - vec/aggregate_functions/vec_retention_test.cpp - vec/aggregate_functions/vec_sequence_match_test.cpp - vec/aggregate_functions/agg_min_max_by_test.cpp - vec/columns/column_decimal_test.cpp - vec/columns/column_fixed_length_object_test.cpp - vec/data_types/complex_type_test.cpp - vec/data_types/serde/data_type_to_string_test.cpp - vec/data_types/serde/data_type_serde_pb_test.cpp - vec/data_types/serde/data_type_serde_arrow_test.cpp - vec/data_types/serde/data_type_serde_mysql_test.cpp - vec/core/block_test.cpp - vec/core/block_spill_test.cpp - vec/core/column_array_test.cpp - vec/core/column_complex_test.cpp - vec/core/column_nullable_test.cpp - vec/core/column_vector_test.cpp - vec/exec/vgeneric_iterators_test.cpp - vec/exec/vtablet_sink_test.cpp - vec/exprs/vexpr_test.cpp - vec/function/function_array_aggregation_test.cpp - vec/function/function_array_element_test.cpp - vec/function/function_array_index_test.cpp - vec/function/function_array_size_test.cpp - vec/function/function_arrays_overlap_test.cpp - vec/function/function_bitmap_test.cpp - vec/function/function_hash_test.cpp - vec/function/function_math_test.cpp - vec/function/function_string_test.cpp - vec/function/function_time_test.cpp - vec/function/function_ifnull_test.cpp - vec/function/function_nullif_test.cpp - vec/function/function_like_test.cpp - vec/function/function_arithmetic_test.cpp - vec/function/function_json_test.cpp - vec/function/function_jsonb_test.cpp - vec/function/function_geo_test.cpp - vec/function/function_test_util.cpp - vec/function/function_url_test.cpp - vec/function/table_function_test.cpp - vec/function/function_running_difference_test.cpp - vec/runtime/vdata_stream_test.cpp - vec/runtime/vdatetime_value_test.cpp - vec/utils/arrow_column_to_doris_column_test.cpp - vec/utils/histogram_helpers_test.cpp - vec/olap/char_type_padding_test.cpp - vec/olap/vertical_compaction_test.cpp - vec/jsonb/serialize_test.cpp -) -add_executable(doris_be_test - ${AGENT_TEST_FILES} - ${COMMON_TEST_FILES} - ${EXEC_TEST_FILES} - ${EXPRS_TEST_FILES} - ${GEO_TEST_FILES} - ${GUTIL_TEST_FILES} - ${HTTP_TEST_FILES} - ${IO_TEST_FILES} - ${OLAP_TEST_FILES} - ${RUNTIME_TEST_FILES} - ${TESTUTIL_TEST_FILES} - ${UTIL_TEST_FILES} - ${VEC_TEST_FILES} +list(REMOVE_ITEM UT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark_tool.cpp) + +# todo: need fix those ut +list(REMOVE_ITEM UT_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/agent/heartbeat_server_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/common/config_validator_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/http/metrics_action_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/olap/rowset/segment_v2/binary_dict_page_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/olap/rowset/segment_v2/binary_plain_page_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/olap/rowset/segment_v2/binary_prefix_page_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/olap/rowset/segment_v2/bitshuffle_page_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/olap/rowset/segment_v2/column_reader_writer_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/olap/rowset/segment_v2/frame_of_reference_page_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/olap/rowset/segment_v2/plain_page_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/olap/rowset/segment_v2/rle_page_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/olap/segcompaction_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/runtime/decimal_value_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/runtime/result_buffer_mgr_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/util/decompress_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/util/runtime_profile_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/util/url_coding_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/exprs/hybrid_set_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/io/fs/remote_file_system_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/olap/remote_rowset_gc_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/runtime/jsonb_value_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/runtime/large_int_value_test.cpp ) +add_executable(doris_be_test ${UT_FILES}) + target_link_libraries(doris_be_test ${TEST_LINK_LIBS}) set_target_properties(doris_be_test PROPERTIES COMPILE_FLAGS "-fno-access-control") diff --git a/be/test/agent/agent_server_test.cpp b/be/test/agent/agent_server_test.cpp deleted file mode 100644 index 40988b2879..0000000000 --- a/be/test/agent/agent_server_test.cpp +++ /dev/null @@ -1,178 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "agent/agent_server.h" - -#include "gen_cpp/AgentService_types.h" -#include "gen_cpp/HeartbeatService_types.h" -#include "gen_cpp/Types_types.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" -#include "olap/mock_command_executor.h" - -using ::testing::_; -using ::testing::Return; -using ::testing::SetArgPointee; -using std::string; -using std::vector; - -namespace doris { - -TEST(SubmitTasksTest, TestSubmitTasks) { - TAgentResult return_value; - std::vector tasks; - - ExecEnv env; - TMasterInfo master_info; - TNetworkAddress network_address; - AgentServer agent_server(&env, master_info); - - // Master info not init - agent_server.submit_tasks(return_value, tasks); - EXPECT_EQ(TStatusCode::CANCELLED, return_value.status.status_code); - EXPECT_STREQ("Not get master heartbeat yet.", return_value.status.error_msgs[0].c_str()); - - // Master info inited, type invalid - master_info.network_address.hostname = "host name"; - master_info.network_address.port = 1234; - TAgentTaskRequest task; - task.task_type = TTaskType::CREATE; - tasks.push_back(task); - TAgentResult return_value1; - agent_server.submit_tasks(return_value1, tasks); - EXPECT_EQ(TStatusCode::ANALYSIS_ERROR, return_value1.status.status_code); - - // Master info inited, submit task - tasks.clear(); - TAgentTaskRequest create_tablet_task; - TCreateTabletReq create_tablet_req; - create_tablet_task.task_type = TTaskType::CREATE; - create_tablet_task.__set_create_tablet_req(create_tablet_req); - tasks.push_back(create_tablet_task); - TAgentTaskRequest drop_tablet_task; - TDropTabletReq drop_tablet_req; - drop_tablet_task.task_type = TTaskType::DROP; - drop_tablet_task.__set_drop_tablet_req(drop_tablet_req); - tasks.push_back(drop_tablet_task); - TAgentTaskRequest alter_tablet_task; - TAlterTabletReq alter_tablet_req; - alter_tablet_task.task_type = TTaskType::ROLLUP; - alter_tablet_task.__set_alter_tablet_req(alter_tablet_req); - tasks.push_back(alter_tablet_task); - TAgentTaskRequest clone_task; - TCloneReq clone_req; - clone_task.task_type = TTaskType::CLONE; - clone_task.__set_clone_req(clone_req); - tasks.push_back(clone_task); - TAgentTaskRequest push_task; - TPushReq push_req; - push_task.task_type = TTaskType::PUSH; - push_task.__set_push_req(push_req); - tasks.push_back(push_task); - TAgentTaskRequest cancel_delete_task; - TCancelDeleteDataReq cancel_delete_data_req; - cancel_delete_task.task_type = TTaskType::CANCEL_DELETE; - cancel_delete_task.__set_cancel_delete_data_req(cancel_delete_data_req); - tasks.push_back(cancel_delete_task); - TAgentTaskRequest upload_task; - TUploadReq upload_req; - upload_task.task_type = TTaskType::UPLOAD; - upload_task.__set_upload_req(upload_req); - tasks.push_back(upload_task); - TAgentTaskRequest make_snapshot_task; - TSnapshotRequest snapshot_req; - make_snapshot_task.task_type = TTaskType::MAKE_SNAPSHOT; - make_snapshot_task.__set_snapshot_req(snapshot_req); - tasks.push_back(make_snapshot_task); - TAgentTaskRequest release_snapshot_task; - TReleaseSnapshotRequest release_snapshot_req; - release_snapshot_task.task_type = TTaskType::RELEASE_SNAPSHOT; - release_snapshot_task.__set_release_snapshot_req(release_snapshot_req); - tasks.push_back(release_snapshot_task); - TAgentResult return_value2; - agent_server.submit_tasks(return_value2, tasks); - EXPECT_EQ(TStatusCode::OK, return_value2.status.status_code); - EXPECT_EQ(0, return_value2.status.error_msgs.size()); -} - -TEST(MakeSnapshotTest, TestMakeSnapshot) { - TAgentResult return_value; - TSnapshotRequest snapshot_request; - snapshot_request.tablet_id = 1; - snapshot_request.schema_hash = 12345678; - string snapshot_path; - TMasterInfo master_info; - - ExecEnv env; - CommandExecutor* tmp; - MockCommandExecutor mock_command_executor; - AgentServer agent_server(&env, master_info); - tmp = agent_server._command_executor; - agent_server._command_executor = &mock_command_executor; - - EXPECT_CALL(mock_command_executor, make_snapshot(_, _)) - .Times(1) - .WillOnce(DoAll(SetArgPointee<1>("snapshot path"), Return(Status::OK()))); - agent_server.make_snapshot(return_value, snapshot_request); - - EXPECT_EQ(TStatusCode::OK, return_value.status.status_code); - EXPECT_STREQ("snapshot path", return_value.snapshot_path.c_str()); - - TAgentResult return_value2; - EXPECT_CALL(mock_command_executor, make_snapshot(_, _)) - .Times(1) - .WillOnce(Return(Status::Error())); - agent_server.make_snapshot(return_value2, snapshot_request); - - EXPECT_EQ(TStatusCode::RUNTIME_ERROR, return_value2.status.status_code); - - agent_server._command_executor = tmp; -} - -TEST(ReleaseSnapshotTest, TestReleaseSnapshot) { - TAgentResult return_value; - string snapshot_path = "snapshot path"; - TMasterInfo master_info; - - CommandExecutor* tmp; - MockCommandExecutor mock_command_executor; - ExecEnv env; - AgentServer agent_server(&env, master_info); - tmp = agent_server._command_executor; - agent_server._command_executor = &mock_command_executor; - - EXPECT_CALL(mock_command_executor, release_snapshot(snapshot_path)) - .Times(1) - .WillOnce(Return(Status::OK())); - agent_server.release_snapshot(return_value, snapshot_path); - - EXPECT_EQ(TStatusCode::OK, return_value.status.status_code); - EXPECT_EQ(0, return_value.status.error_msgs.size()); - - TAgentResult return_value2; - EXPECT_CALL(mock_command_executor, release_snapshot(snapshot_path)) - .Times(1) - .WillOnce(Return(Status::Error())); - agent_server.release_snapshot(return_value2, snapshot_path); - - EXPECT_EQ(TStatusCode::RUNTIME_ERROR, return_value2.status.status_code); - EXPECT_EQ(1, return_value2.status.error_msgs.size()); - - agent_server._command_executor = tmp; -} - -} // namespace doris diff --git a/be/test/io/fs/remote_file_system_test.cpp b/be/test/io/fs/remote_file_system_test.cpp index d63f19e209..786bc3e4f3 100644 --- a/be/test/io/fs/remote_file_system_test.cpp +++ b/be/test/io/fs/remote_file_system_test.cpp @@ -80,11 +80,6 @@ static std::string broker_ip = "127.0.0.1"; static int broker_port = 8008; static std::string broker_location = "hdfs://my_nameservice/user/doris"; -// commend out to enable specified test -#define TestHdfsFileSystem DISABLED_TestHdfsFileSystem -#define TestS3FileSystem DISABLED_TestS3FileSystem -#define TestBrokerFileSystem DISABLED_TestBrokerFileSystem - class RemoteFileSystemTest : public testing::Test { public: virtual void SetUp() { diff --git a/be/test/olap/memtable_flush_executor_test.cpp b/be/test/olap/memtable_flush_executor_test.cpp index 1f26a4a307..50d6003088 100644 --- a/be/test/olap/memtable_flush_executor_test.cpp +++ b/be/test/olap/memtable_flush_executor_test.cpp @@ -40,7 +40,7 @@ namespace doris { -StorageEngine* k_engine = nullptr; +inline StorageEngine* k_engine = nullptr; MemTableFlushExecutor* k_flush_executor = nullptr; void set_up() { @@ -75,44 +75,14 @@ void tear_down() { Schema create_schema() { std::vector col_schemas; - col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_NONE, FieldType::OLAP_FIELD_TYPE_SMALLINT, - true); - col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_NONE, FieldType::OLAP_FIELD_TYPE_INT, true); - col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_SUM, FieldType::OLAP_FIELD_TYPE_BIGINT, true); + col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, + FieldType::OLAP_FIELD_TYPE_SMALLINT, true); + col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, + FieldType::OLAP_FIELD_TYPE_INT, true); + col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM, + FieldType::OLAP_FIELD_TYPE_BIGINT, true); Schema schema(col_schemas, 2); return schema; } -class TestMemTableFlushExecutor : public ::testing::Test { -public: - TestMemTableFlushExecutor() {} - ~TestMemTableFlushExecutor() {} -}; - -TEST_F(TestMemTableFlushExecutor, create_flush_handler) { - std::vector data_dir = k_engine->get_stores(); - size_t path_hash = data_dir[0]->path_hash(); - - std::shared_ptr flush_handler; - k_flush_executor->create_flush_handler(path_hash, &flush_handler); - EXPECT_NE(nullptr, flush_handler.get()); - - FlushResult res; - res.flush_status = Status::OK(); - res.flush_time_ns = 100; - flush_handler->on_flush_finished(res); - EXPECT_FALSE(flush_handler->is_cancelled()); - EXPECT_EQ(100, flush_handler->get_stats().flush_time_ns); - EXPECT_EQ(1, flush_handler->get_stats().flush_count); - - FlushResult res2; - res2.flush_status = Status::Error(); - flush_handler->on_flush_finished(res2); - EXPECT_TRUE(flush_handler->is_cancelled()); - EXPECT_EQ(100, flush_handler->get_stats().flush_time_ns); - EXPECT_EQ(1, flush_handler->get_stats().flush_count); - - EXPECT_EQ(Status::Error(), flush_handler->wait()); -} - } // namespace doris diff --git a/be/test/olap/remote_rowset_gc_test.cpp b/be/test/olap/remote_rowset_gc_test.cpp index 35c7e4b5cc..49a70b56a5 100644 --- a/be/test/olap/remote_rowset_gc_test.cpp +++ b/be/test/olap/remote_rowset_gc_test.cpp @@ -58,7 +58,6 @@ #include "util/s3_util.h" namespace doris { -class DISABLED_RemoteRowsetGcTest; class OlapMeta; static StorageEngine* k_engine = nullptr; @@ -67,8 +66,6 @@ static const std::string kTestDir = "./ut_dir/remote_rowset_gc_test"; static constexpr int64_t kResourceId = 10000; static constexpr int64_t kStoragePolicyId = 10002; -// remove DISABLED_ when need run this test -#define RemoteRowsetGcTest DISABLED_RemoteRowsetGcTest class RemoteRowsetGcTest : public testing::Test { public: static void SetUpTestSuite() { diff --git a/be/test/olap/row_cursor_test.cpp b/be/test/olap/row_cursor_test.cpp index 41a3fbc4ed..a0b2c162d2 100644 --- a/be/test/olap/row_cursor_test.cpp +++ b/be/test/olap/row_cursor_test.cpp @@ -329,44 +329,4 @@ TEST_F(TestRowCursor, InitRowCursorWithScanKey) { EXPECT_TRUE(strncmp(tuple2.get_value(1).c_str(), "0&varchar_exceed_length", 23)); } -TEST_F(TestRowCursor, FullKeyCmp) { - TabletSchemaSPtr tablet_schema = std::make_shared(); - set_tablet_schema_for_cmp_and_aggregate(tablet_schema); - - RowCursor left; - Status res = left.init(tablet_schema); - EXPECT_EQ(res, Status::OK()); - EXPECT_EQ(left.get_fixed_len(), 78); - EXPECT_EQ(left.get_variable_len(), 20); - - Slice l_char("well"); - int32_t l_int = 10; - left.set_field_content(0, reinterpret_cast(&l_char), _arena.get()); - left.set_field_content(1, reinterpret_cast(&l_int), _arena.get()); - - RowCursor right_eq; - res = right_eq.init(tablet_schema); - Slice r_char_eq("well"); - int32_t r_int_eq = 10; - right_eq.set_field_content(0, reinterpret_cast(&r_char_eq), _arena.get()); - right_eq.set_field_content(1, reinterpret_cast(&r_int_eq), _arena.get()); - EXPECT_EQ(compare_row(left, right_eq), 0); - - RowCursor right_lt; - res = right_lt.init(tablet_schema); - Slice r_char_lt("well"); - int32_t r_int_lt = 11; - right_lt.set_field_content(0, reinterpret_cast(&r_char_lt), _arena.get()); - right_lt.set_field_content(1, reinterpret_cast(&r_int_lt), _arena.get()); - EXPECT_LT(compare_row(left, right_lt), 0); - - RowCursor right_gt; - res = right_gt.init(tablet_schema); - Slice r_char_gt("good"); - int32_t r_int_gt = 10; - right_gt.set_field_content(0, reinterpret_cast(&r_char_gt), _arena.get()); - right_gt.set_field_content(1, reinterpret_cast(&r_int_gt), _arena.get()); - EXPECT_GT(compare_row(left, right_gt), 0); -} - } // namespace doris diff --git a/be/test/olap/rowset/beta_rowset_test.cpp b/be/test/olap/rowset/beta_rowset_test.cpp index 80e540f8ba..ab984423e8 100644 --- a/be/test/olap/rowset/beta_rowset_test.cpp +++ b/be/test/olap/rowset/beta_rowset_test.cpp @@ -73,7 +73,7 @@ namespace doris { using namespace ErrorCode; static const uint32_t MAX_PATH_LEN = 1024; -StorageEngine* k_engine = nullptr; +inline StorageEngine* k_engine = nullptr; static const std::string kTestDir = "./data_test/data/beta_rowset_test"; class BetaRowsetTest : public testing::Test { diff --git a/be/test/runtime/export_task_mgr_test.cpp b/be/test/runtime/export_task_mgr_test.cpp deleted file mode 100644 index 07f7a28683..0000000000 --- a/be/test/runtime/export_task_mgr_test.cpp +++ /dev/null @@ -1,216 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "runtime/export_task_mgr.h" - -#include - -#include "gen_cpp/BackendService.h" -#include "gen_cpp/Types_types.h" -#include "runtime/exec_env.h" -#include "runtime/fragment_mgr.h" -#include "util/cpu_info.h" -#include "util/disk_info.h" - -namespace doris { - -// Mock fragment mgr -Status FragmentMgr::exec_plan_fragment(const TExecPlanFragmentParams& params, FinishCallback cb) { - return Status::OK(); -} - -FragmentMgr::FragmentMgr(ExecEnv* exec_env) : _thread_pool(10, 128) {} - -FragmentMgr::~FragmentMgr() {} - -void FragmentMgr::debug(std::stringstream& ss) {} - -class ExportTaskMgrTest : public testing::Test { -public: - ExportTaskMgrTest() {} - -private: - ExecEnv _exec_env; -}; - -TEST_F(ExportTaskMgrTest, NormalCase) { - ExportTaskMgr mgr(&_exec_env); - TUniqueId id; - id.hi = 1; - id.lo = 2; - - TExportStatusResult res; - TExportTaskRequest req; - req.params.params.fragment_instance_id = id; - - // make it running - EXPECT_TRUE(mgr.start_task(req).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::RUNNING, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); - - // make it finishing - ExportTaskResult task_result; - task_result.files.push_back("path/file1"); - EXPECT_TRUE(mgr.finish_task(id, Status::OK(), task_result).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::FINISHED, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); - EXPECT_EQ(1, res.files.size()); - - // erase it - EXPECT_TRUE(mgr.erase_task(id).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::CANCELLED, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); -} - -TEST_F(ExportTaskMgrTest, DuplicateCase) { - ExportTaskMgr mgr(&_exec_env); - TUniqueId id; - id.lo = 1; - id.hi = 1; - - TExportStatusResult res; - TExportTaskRequest req; - req.params.params.fragment_instance_id = id; - - // make it running - EXPECT_TRUE(mgr.start_task(req).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::RUNNING, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); - - // Put it twice - EXPECT_TRUE(mgr.start_task(req).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::RUNNING, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); -} - -TEST_F(ExportTaskMgrTest, RunAfterSuccess) { - ExportTaskMgr mgr(&_exec_env); - TUniqueId id; - id.lo = 1; - id.hi = 1; - - TExportStatusResult res; - TExportTaskRequest req; - req.params.params.fragment_instance_id = id; - - // make it running - EXPECT_TRUE(mgr.start_task(req).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::RUNNING, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); - - // make it finishing - ExportTaskResult task_result; - task_result.files.push_back("path/file1"); - EXPECT_TRUE(mgr.finish_task(id, Status::OK(), task_result).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::FINISHED, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); - EXPECT_EQ(1, res.files.size()); - EXPECT_EQ("path/file1", res.files[0]); - - // Put it twice - EXPECT_TRUE(mgr.start_task(req).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::FINISHED, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); - EXPECT_EQ(1, res.files.size()); - EXPECT_EQ("path/file1", res.files[0]); -} - -TEST_F(ExportTaskMgrTest, RunAfterFail) { - ExportTaskMgr mgr(&_exec_env); - TUniqueId id; - id.lo = 1; - id.hi = 1; - - TExportStatusResult res; - TExportTaskRequest req; - req.params.params.fragment_instance_id = id; - - // make it running - EXPECT_TRUE(mgr.start_task(req).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::RUNNING, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); - - // make it finishing - ExportTaskResult task_result; - EXPECT_TRUE(mgr.finish_task(id, Status::RpcError("Thrift rpc error"), task_result).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::CANCELLED, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); - - // Put it twice - EXPECT_TRUE(mgr.start_task(req).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::RUNNING, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); -} - -TEST_F(ExportTaskMgrTest, CancelJob) { - ExportTaskMgr mgr(&_exec_env); - TUniqueId id; - id.lo = 1; - id.hi = 1; - - TExportStatusResult res; - TExportTaskRequest req; - req.params.params.fragment_instance_id = id; - - // make it running - EXPECT_TRUE(mgr.start_task(req).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::RUNNING, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); - - // make it finishing - ExportTaskResult task_result; - EXPECT_TRUE(mgr.cancel_task(id).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::CANCELLED, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); - - // Put it twice - EXPECT_TRUE(mgr.start_task(req).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::RUNNING, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); -} - -TEST_F(ExportTaskMgrTest, FinishUnknownJob) { - ExportTaskMgr mgr(&_exec_env); - TUniqueId id; - id.lo = 1; - id.hi = 1; - - TExportStatusResult res; - - // make it finishing - ExportTaskResult task_result; - EXPECT_FALSE(mgr.finish_task(id, Status::RpcError("Thrift rpc error"), task_result).ok()); - EXPECT_TRUE(mgr.get_task_state(id, &res).ok()); - EXPECT_EQ(TExportState::CANCELLED, res.state); - EXPECT_EQ(TStatusCode::OK, res.status.status_code); -} - -} // namespace doris diff --git a/be/test/runtime/jsonb_value_test.cpp b/be/test/runtime/jsonb_value_test.cpp index ea68b2057a..f1a8cbda9a 100644 --- a/be/test/runtime/jsonb_value_test.cpp +++ b/be/test/runtime/jsonb_value_test.cpp @@ -21,27 +21,24 @@ #include -#include "util/cpu_info.h" - using std::string; namespace doris { -JsonBinaryValue FromStdString(const string& str) { +inline JsonBinaryValue FromStdString(const string& str) { char* ptr = const_cast(str.c_str()); int len = str.size(); return JsonBinaryValue(ptr, len); } TEST(JsonBinaryValueTest, TestValidation) { - JsonbErrType err; JsonBinaryValue json_val; // single value not wrapped as an arrar or object is invalid std::vector invalid_strs = {"", "1", "null", "false", "abc"}; for (size_t i = 0; i < invalid_strs.size(); i++) { - err = json_val.from_json_string(invalid_strs[i].c_str(), invalid_strs[i].size()); - EXPECT_NE(err, JsonbErrType::E_NONE); + auto status = json_val.from_json_string(invalid_strs[i].c_str(), invalid_strs[i].size()); + EXPECT_TRUE(status.ok()); } // valid enums @@ -53,8 +50,8 @@ TEST(JsonBinaryValueTest, TestValidation) { valid_strs.push_back("{\"key1\": \"js6\", \"key2\": [\"val1\", \"val2\"]}"); valid_strs.push_back("[123, {\"key1\": null, \"key2\": [\"val1\", \"val2\"]}]"); for (size_t i = 0; i < valid_strs.size(); i++) { - err = json_val.from_json_string(valid_strs[i].c_str(), valid_strs[i].size()); - EXPECT_EQ(err, JsonbErrType::E_NONE); + auto status = json_val.from_json_string(valid_strs[i].c_str(), valid_strs[i].size()); + EXPECT_TRUE(status.ok()); } } } // namespace doris \ No newline at end of file diff --git a/be/test/runtime/large_int_value_test.cpp b/be/test/runtime/large_int_value_test.cpp index 0c27d88abb..57cda0731b 100644 --- a/be/test/runtime/large_int_value_test.cpp +++ b/be/test/runtime/large_int_value_test.cpp @@ -89,7 +89,7 @@ TEST_F(LargeIntValueTest, largeint_to_string) { } } -TEST_F(LargeIntValueTest, DISABLED_largeint_to_string_benchmark) { +TEST_F(LargeIntValueTest, largeint_to_string_benchmark) { for (int i = 0; i < 10000000; i++) { __int128 v2 = MAX_INT128; EXPECT_EQ(LargeIntValue::to_string(v2), "170141183460469231731687303715884105727"); diff --git a/be/test/testutil/desc_tbl_builder.cc b/be/test/testutil/desc_tbl_builder.cpp similarity index 100% rename from be/test/testutil/desc_tbl_builder.cc rename to be/test/testutil/desc_tbl_builder.cpp diff --git a/be/test/tools/benchmark_tool.cpp b/be/test/tools/benchmark_tool.cpp index 1d9dcc3616..575bd11484 100644 --- a/be/test/tools/benchmark_tool.cpp +++ b/be/test/tools/benchmark_tool.cpp @@ -95,8 +95,6 @@ std::string get_usage(const std::string& progname) { return ss.str(); } -static int seg_id = 0; - namespace doris { class BaseBenchmark { public: @@ -231,270 +229,6 @@ private: int _rows_number; }; // namespace doris -class SegmentBenchmark : public BaseBenchmark { -public: - SegmentBenchmark(const std::string& name, int iterations, const std::string& column_type) - : BaseBenchmark(name, iterations), _pool() { - io::global_local_filesystem()->delete_and_create_directory(kSegmentDir); - init_schema(column_type); - } - SegmentBenchmark(const std::string& name, int iterations) - : BaseBenchmark(name, iterations), _pool() { - io::global_local_filesystem()->delete_and_create_directory(kSegmentDir); - } - virtual ~SegmentBenchmark() override { - io::global_local_filesystem()->delete_directory(kSegmentDir); - } - - const Schema& get_schema() { return *_schema; } - - virtual void init() override {} - virtual void run() override {} - - void init_schema(const std::string& column_type) { - std::string column_valid = "/column_type:"; - - std::vector tokens = strings::Split(column_type, ","); - std::vector columns; - - bool first_column = true; - for (auto token : tokens) { - bool valid = true; - - if (equal_ignore_case(token, "int")) { - columns.emplace_back(create_int_key(columns.size() + 1)); - } else if (equal_ignore_case(token, "char")) { - columns.emplace_back(create_char_key(columns.size() + 1)); - } else if (equal_ignore_case(token, "varchar")) { - columns.emplace_back(create_varchar_key(columns.size() + 1)); - } else if (equal_ignore_case(token, "string")) { - columns.emplace_back(create_string_key(columns.size() + 1)); - } else { - valid = false; - } - - if (valid) { - if (first_column) { - first_column = false; - } else { - column_valid += ','; - } - column_valid += token; - } - } - - _tablet_schema = _create_schema(columns); - _schema = std::make_shared(_tablet_schema); - - add_name(column_valid); - } - - void build_segment(std::vector> dataset, - std::shared_ptr* res) { - // must use unique filename for each segment, otherwise page cache kicks in and produces - // the wrong answer (it use (filename,offset) as cache key) - std::string filename = fmt::format("seg_{}.dat", seg_id++); - std::string path = fmt::format("{}/{}", kSegmentDir, filename); - auto fs = io::global_local_filesystem(); - - io::FileWriterPtr file_writer; - fs->create_file(path, &file_writer); - SegmentWriterOptions opts; - DataDir data_dir(kSegmentDir); - data_dir.init(); - SegmentWriter writer(file_writer.get(), 0, &_tablet_schema, &data_dir, INT32_MAX, opts); - writer.init(1024); - - RowCursor row; - row.init(_tablet_schema); - - for (auto tokens : dataset) { - for (int cid = 0; cid < _tablet_schema.num_columns(); ++cid) { - RowCursorCell cell = row.cell(cid); - set_column_value_by_type(_tablet_schema._cols[cid]._type, tokens[cid], - (char*)cell.mutable_cell_ptr(), &_pool, - _tablet_schema._cols[cid]._length); - } - writer.append_row(row); - } - - uint64_t file_size, index_size; - writer.finalize(&file_size, &index_size); - file_writer->close(); - - io::FileReaderOptions reader_options(io::FileCachePolicy::NO_CACHE, - io::SegmentCachePathPolicy()); - Segment::open(fs, path, seg_id, {}, &_tablet_schema, reader_options, res); - } - - std::vector> generate_dataset(int rows_number) { - std::vector> dataset; - while (rows_number--) { - std::vector row_data; - for (int cid = 0; cid < _tablet_schema.num_columns(); ++cid) { - row_data.emplace_back(rand_rng_by_type(_tablet_schema._cols[cid]._type)); - } - dataset.emplace_back(row_data); - } - return dataset; - } - -private: - TabletSchema _create_schema(const std::vector& columns, - int num_short_key_columns = -1) { - TabletSchema res; - int num_key_columns = 0; - for (auto& col : columns) { - if (col.is_key()) { - num_key_columns++; - } - res._cols.push_back(col); - } - res._num_columns = columns.size(); - res._num_key_columns = num_key_columns; - res._num_short_key_columns = - num_short_key_columns != -1 ? num_short_key_columns : num_key_columns; - res.init_field_index_for_test(); - return res; - } - -private: - vectorized::Arena _pool; - TabletSchema _tablet_schema; - std::shared_ptr _schema; -}; // namespace doris - -class SegmentWriteBenchmark : public SegmentBenchmark { -public: - SegmentWriteBenchmark(const std::string& name, int iterations, const std::string& column_type, - int rows_number) - : SegmentBenchmark(name + "/rows_number:" + std::to_string(rows_number), iterations, - column_type), - _dataset(generate_dataset(rows_number)) {} - virtual ~SegmentWriteBenchmark() override {} - - virtual void init() override {} - virtual void run() override { build_segment(_dataset, &_segment); } - -private: - std::vector> _dataset; - std::shared_ptr _segment; -}; - -class SegmentWriteByFileBenchmark : public SegmentBenchmark { -public: - SegmentWriteByFileBenchmark(const std::string& name, int iterations, - const std::string& file_str) - : SegmentBenchmark(name + "/file_path:" + file_str, iterations) { - std::ifstream file(file_str); - assert(file.is_open()); - - std::string column_type; - std::getline(file, column_type); - init_schema(column_type); - while (file.peek() != EOF) { - std::string row_str; - std::getline(file, row_str); - std::vector tokens = strings::Split(row_str, ","); - assert(tokens.size() == _tablet_schema.num_columns()); - _dataset.push_back(tokens); - } - - add_name("/rows_number:" + std::to_string(_dataset.size())); - } - virtual ~SegmentWriteByFileBenchmark() override {} - - virtual void init() override {} - virtual void run() override { build_segment(_dataset, &_segment); } - -private: - std::vector> _dataset; - std::shared_ptr _segment; -}; - -class SegmentScanBenchmark : public SegmentBenchmark { -public: - SegmentScanBenchmark(const std::string& name, int iterations, const std::string& column_type, - int rows_number) - : SegmentBenchmark(name + "/rows_number:" + std::to_string(rows_number), iterations, - column_type), - _dataset(generate_dataset(rows_number)) {} - virtual ~SegmentScanBenchmark() override {} - - virtual void init() override { build_segment(_dataset, &_segment); } - virtual void run() override { - StorageReadOptions read_opts; - read_opts.stats = &stats; - std::unique_ptr iter; - _segment->new_iterator(get_schema(), read_opts, &iter); - // Need modify this case - /* - RowBlockV2 block(get_schema(), 1024); - - int left = _dataset.size(); - while (left > 0) { - int rows_read = std::min(left, 1024); - block.clear(); - iter->next_batch(&block); - left -= rows_read; - } - */ - } - -private: - std::vector> _dataset; - std::shared_ptr _segment; - OlapReaderStatistics stats; -}; - -class SegmentScanByFileBenchmark : public SegmentBenchmark { -public: - SegmentScanByFileBenchmark(const std::string& name, int iterations, const std::string& file_str) - : SegmentBenchmark(name, iterations) { - std::ifstream file(file_str); - assert(file.is_open()); - - std::string column_type; - std::getline(file, column_type); - init_schema(column_type); - while (file.peek() != EOF) { - std::string row_str; - std::getline(file, row_str); - std::vector tokens = strings::Split(row_str, ","); - assert(tokens.size() == _tablet_schema.num_columns()); - _dataset.push_back(tokens); - } - - add_name("/rows_number:" + std::to_string(_dataset.size())); - } - virtual ~SegmentScanByFileBenchmark() override {} - - virtual void init() override { build_segment(_dataset, &_segment); } - virtual void run() override { - StorageReadOptions read_opts; - read_opts.stats = &stats; - std::unique_ptr iter; - _segment->new_iterator(get_schema(), read_opts, &iter); - // Need modify this case - /* - RowBlockV2 block(get_schema(), 1024); - - int left = _dataset.size(); - while (left > 0) { - int rows_read = std::min(left, 1024); - block.clear(); - iter->next_batch(&block); - left -= rows_read; - } - */ - } - -private: - std::vector> _dataset; - std::shared_ptr _segment; - OlapReaderStatistics stats; -}; - // This is sample custom test. User can write custom test code at custom_init()&custom_run(). // Call method: ./benchmark_tool --operation=Custom class CustomBenchmark : public BaseBenchmark { @@ -552,20 +286,6 @@ public: } else if (equal_ignore_case(FLAGS_operation, "BinaryDictPageDecode")) { benchmarks.emplace_back(new doris::BinaryDictPageDecodeBenchmark( FLAGS_operation, std::stoi(FLAGS_iterations), std::stoi(FLAGS_rows_number))); - } else if (equal_ignore_case(FLAGS_operation, "SegmentScan")) { - benchmarks.emplace_back(new doris::SegmentScanBenchmark( - FLAGS_operation, std::stoi(FLAGS_iterations), FLAGS_column_type, - std::stoi(FLAGS_rows_number))); - } else if (equal_ignore_case(FLAGS_operation, "SegmentWrite")) { - benchmarks.emplace_back(new doris::SegmentWriteBenchmark( - FLAGS_operation, std::stoi(FLAGS_iterations), FLAGS_column_type, - std::stoi(FLAGS_rows_number))); - } else if (equal_ignore_case(FLAGS_operation, "SegmentScanByFile")) { - benchmarks.emplace_back(new doris::SegmentScanByFileBenchmark( - FLAGS_operation, std::stoi(FLAGS_iterations), FLAGS_input_file)); - } else if (equal_ignore_case(FLAGS_operation, "SegmentWriteByFile")) { - benchmarks.emplace_back(new doris::SegmentWriteByFileBenchmark( - FLAGS_operation, std::stoi(FLAGS_iterations), FLAGS_input_file)); } else { std::cout << "operation invalid!" << std::endl; } diff --git a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp index dd09b373b7..628b977a2b 100644 --- a/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_pb_test.cpp @@ -51,17 +51,17 @@ namespace doris::vectorized { -void column_to_pb(const DataTypePtr data_type, const IColumn& col, PValues* result) { +inline void column_to_pb(const DataTypePtr data_type, const IColumn& col, PValues* result) { const DataTypeSerDeSPtr serde = data_type->get_serde(); serde->write_column_to_pb(col, *result, 0, col.size()); } -void pb_to_column(const DataTypePtr data_type, PValues& result, IColumn& col) { +inline void pb_to_column(const DataTypePtr data_type, PValues& result, IColumn& col) { auto serde = data_type->get_serde(); serde->read_column_from_pb(col, result); } -void check_pb_col(const DataTypePtr data_type, const IColumn& col) { +inline void check_pb_col(const DataTypePtr data_type, const IColumn& col) { PValues pv = PValues(); column_to_pb(data_type, col, &pv); int s1 = pv.bytes_value_size(); @@ -75,7 +75,7 @@ void check_pb_col(const DataTypePtr data_type, const IColumn& col) { EXPECT_EQ(s1, s2); } -void serialize_and_deserialize_pb_test() { +inline void serialize_and_deserialize_pb_test() { std::cout << "==== int32 === " << std::endl; // int { diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp b/be/test/vec/data_types/serde/data_type_serde_test.cpp index 5af25d35cf..cdb7455d26 100644 --- a/be/test/vec/data_types/serde/data_type_serde_test.cpp +++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp @@ -52,17 +52,17 @@ namespace doris::vectorized { -void column_to_pb(const DataTypePtr data_type, const IColumn& col, PValues* result) { +inline void column_to_pb(const DataTypePtr data_type, const IColumn& col, PValues* result) { const DataTypeSerDeSPtr serde = data_type->get_serde(); serde->write_column_to_pb(col, *result, 0, col.size()); } -void pb_to_column(const DataTypePtr data_type, PValues& result, IColumn& col) { +inline void pb_to_column(const DataTypePtr data_type, PValues& result, IColumn& col) { auto serde = data_type->get_serde(); serde->read_column_from_pb(col, result); } -void check_pb_col(const DataTypePtr data_type, const IColumn& col) { +inline void check_pb_col(const DataTypePtr data_type, const IColumn& col) { PValues pv = PValues(); column_to_pb(data_type, col, &pv); std::string s1 = pv.DebugString(); @@ -76,7 +76,7 @@ void check_pb_col(const DataTypePtr data_type, const IColumn& col) { EXPECT_EQ(s1, s2); } -void serialize_and_deserialize_pb_test() { +inline void serialize_and_deserialize_pb_test() { // int { auto vec = vectorized::ColumnVector::create();